from datetime import (
|
date,
|
datetime,
|
timedelta,
|
)
|
|
import numpy as np
|
import pytest
|
|
from pandas.core.dtypes.cast import (
|
infer_dtype_from,
|
infer_dtype_from_array,
|
infer_dtype_from_scalar,
|
)
|
from pandas.core.dtypes.common import is_dtype_equal
|
|
from pandas import (
|
Categorical,
|
Interval,
|
Period,
|
Series,
|
Timedelta,
|
Timestamp,
|
date_range,
|
)
|
|
|
@pytest.fixture(params=[True, False])
|
def pandas_dtype(request):
|
return request.param
|
|
|
def test_infer_dtype_from_int_scalar(any_int_numpy_dtype):
|
# Test that infer_dtype_from_scalar is
|
# returning correct dtype for int and float.
|
data = np.dtype(any_int_numpy_dtype).type(12)
|
dtype, val = infer_dtype_from_scalar(data)
|
assert dtype == type(data)
|
|
|
def test_infer_dtype_from_float_scalar(float_numpy_dtype):
|
float_numpy_dtype = np.dtype(float_numpy_dtype).type
|
data = float_numpy_dtype(12)
|
|
dtype, val = infer_dtype_from_scalar(data)
|
assert dtype == float_numpy_dtype
|
|
|
@pytest.mark.parametrize(
|
"data,exp_dtype", [(12, np.int64), (np.float_(12), np.float64)]
|
)
|
def test_infer_dtype_from_python_scalar(data, exp_dtype):
|
dtype, val = infer_dtype_from_scalar(data)
|
assert dtype == exp_dtype
|
|
|
@pytest.mark.parametrize("bool_val", [True, False])
|
def test_infer_dtype_from_boolean(bool_val):
|
dtype, val = infer_dtype_from_scalar(bool_val)
|
assert dtype == np.bool_
|
|
|
def test_infer_dtype_from_complex(complex_dtype):
|
data = np.dtype(complex_dtype).type(1)
|
dtype, val = infer_dtype_from_scalar(data)
|
assert dtype == np.complex_
|
|
|
@pytest.mark.parametrize(
|
"data", [np.datetime64(1, "ns"), Timestamp(1), datetime(2000, 1, 1, 0, 0)]
|
)
|
def test_infer_dtype_from_datetime(data):
|
dtype, val = infer_dtype_from_scalar(data)
|
assert dtype == "M8[ns]"
|
|
|
@pytest.mark.parametrize("data", [np.timedelta64(1, "ns"), Timedelta(1), timedelta(1)])
|
def test_infer_dtype_from_timedelta(data):
|
dtype, val = infer_dtype_from_scalar(data)
|
assert dtype == "m8[ns]"
|
|
|
@pytest.mark.parametrize("freq", ["M", "D"])
|
def test_infer_dtype_from_period(freq, pandas_dtype):
|
p = Period("2011-01-01", freq=freq)
|
dtype, val = infer_dtype_from_scalar(p, pandas_dtype=pandas_dtype)
|
|
if pandas_dtype:
|
exp_dtype = f"period[{freq}]"
|
else:
|
exp_dtype = np.object_
|
|
assert dtype == exp_dtype
|
assert val == p
|
|
|
@pytest.mark.parametrize(
|
"data", [date(2000, 1, 1), "foo", Timestamp(1, tz="US/Eastern")]
|
)
|
def test_infer_dtype_misc(data):
|
dtype, val = infer_dtype_from_scalar(data)
|
assert dtype == np.object_
|
|
|
@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"])
|
def test_infer_from_scalar_tz(tz, pandas_dtype):
|
dt = Timestamp(1, tz=tz)
|
dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=pandas_dtype)
|
|
if pandas_dtype:
|
exp_dtype = f"datetime64[ns, {tz}]"
|
else:
|
exp_dtype = np.object_
|
|
assert dtype == exp_dtype
|
assert val == dt
|
|
|
@pytest.mark.parametrize(
|
"left, right, subtype",
|
[
|
(0, 1, "int64"),
|
(0.0, 1.0, "float64"),
|
(Timestamp(0), Timestamp(1), "datetime64[ns]"),
|
(Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"),
|
(Timedelta(0), Timedelta(1), "timedelta64[ns]"),
|
],
|
)
|
def test_infer_from_interval(left, right, subtype, closed, pandas_dtype):
|
# GH 30337
|
interval = Interval(left, right, closed)
|
result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype)
|
expected_dtype = f"interval[{subtype}, {closed}]" if pandas_dtype else np.object_
|
assert result_dtype == expected_dtype
|
assert result_value == interval
|
|
|
def test_infer_dtype_from_scalar_errors():
|
msg = "invalid ndarray passed to infer_dtype_from_scalar"
|
|
with pytest.raises(ValueError, match=msg):
|
infer_dtype_from_scalar(np.array([1]))
|
|
|
@pytest.mark.parametrize(
|
"value, expected, pandas_dtype",
|
[
|
("foo", np.object_, False),
|
(b"foo", np.object_, False),
|
(1, np.int64, False),
|
(1.5, np.float_, False),
|
(np.datetime64("2016-01-01"), np.dtype("M8[ns]"), False),
|
(Timestamp("20160101"), np.dtype("M8[ns]"), False),
|
(Timestamp("20160101", tz="UTC"), np.object_, False),
|
(Timestamp("20160101", tz="UTC"), "datetime64[ns, UTC]", True),
|
],
|
)
|
def test_infer_dtype_from_scalar(value, expected, pandas_dtype):
|
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=pandas_dtype)
|
assert is_dtype_equal(dtype, expected)
|
|
with pytest.raises(TypeError, match="must be list-like"):
|
infer_dtype_from_array(value, pandas_dtype=pandas_dtype)
|
|
|
@pytest.mark.parametrize(
|
"arr, expected, pandas_dtype",
|
[
|
([1], np.int_, False),
|
(np.array([1], dtype=np.int64), np.int64, False),
|
([np.nan, 1, ""], np.object_, False),
|
(np.array([[1.0, 2.0]]), np.float_, False),
|
(Categorical(list("aabc")), np.object_, False),
|
(Categorical([1, 2, 3]), np.int64, False),
|
(Categorical(list("aabc")), "category", True),
|
(Categorical([1, 2, 3]), "category", True),
|
(date_range("20160101", periods=3), np.dtype("=M8[ns]"), False),
|
(
|
date_range("20160101", periods=3, tz="US/Eastern"),
|
"datetime64[ns, US/Eastern]",
|
True,
|
),
|
(Series([1.0, 2, 3]), np.float64, False),
|
(Series(list("abc")), np.object_, False),
|
(
|
Series(date_range("20160101", periods=3, tz="US/Eastern")),
|
"datetime64[ns, US/Eastern]",
|
True,
|
),
|
],
|
)
|
def test_infer_dtype_from_array(arr, expected, pandas_dtype):
|
dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype)
|
assert is_dtype_equal(dtype, expected)
|
|
|
@pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
|
def test_infer_dtype_from_scalar_zerodim_datetimelike(cls):
|
# ndarray.item() can incorrectly return int instead of td64/dt64
|
val = cls(1234, "ns")
|
arr = np.array(val)
|
|
dtype, res = infer_dtype_from_scalar(arr)
|
assert dtype.type is cls
|
assert isinstance(res, cls)
|
|
dtype, res = infer_dtype_from(arr)
|
assert dtype.type is cls
|