from datetime import timedelta
|
|
import numpy as np
|
import pytest
|
|
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
|
import pandas as pd
|
from pandas import (
|
DataFrame,
|
Series,
|
date_range,
|
option_context,
|
)
|
import pandas._testing as tm
|
|
|
class TestDataFrameDataTypes:
|
def test_empty_frame_dtypes(self):
|
empty_df = DataFrame()
|
tm.assert_series_equal(empty_df.dtypes, Series(dtype=object))
|
|
nocols_df = DataFrame(index=[1, 2, 3])
|
tm.assert_series_equal(nocols_df.dtypes, Series(dtype=object))
|
|
norows_df = DataFrame(columns=list("abc"))
|
tm.assert_series_equal(norows_df.dtypes, Series(object, index=list("abc")))
|
|
norows_int_df = DataFrame(columns=list("abc")).astype(np.int32)
|
tm.assert_series_equal(
|
norows_int_df.dtypes, Series(np.dtype("int32"), index=list("abc"))
|
)
|
|
df = DataFrame({"a": 1, "b": True, "c": 1.0}, index=[1, 2, 3])
|
ex_dtypes = Series({"a": np.int64, "b": np.bool_, "c": np.float64})
|
tm.assert_series_equal(df.dtypes, ex_dtypes)
|
|
# same but for empty slice of df
|
tm.assert_series_equal(df[:0].dtypes, ex_dtypes)
|
|
def test_datetime_with_tz_dtypes(self):
|
tzframe = DataFrame(
|
{
|
"A": date_range("20130101", periods=3),
|
"B": date_range("20130101", periods=3, tz="US/Eastern"),
|
"C": date_range("20130101", periods=3, tz="CET"),
|
}
|
)
|
tzframe.iloc[1, 1] = pd.NaT
|
tzframe.iloc[1, 2] = pd.NaT
|
result = tzframe.dtypes.sort_index()
|
expected = Series(
|
[
|
np.dtype("datetime64[ns]"),
|
DatetimeTZDtype("ns", "US/Eastern"),
|
DatetimeTZDtype("ns", "CET"),
|
],
|
["A", "B", "C"],
|
)
|
|
tm.assert_series_equal(result, expected)
|
|
def test_dtypes_are_correct_after_column_slice(self):
|
# GH6525
|
df = DataFrame(index=range(5), columns=list("abc"), dtype=np.float_)
|
tm.assert_series_equal(
|
df.dtypes,
|
Series({"a": np.float_, "b": np.float_, "c": np.float_}),
|
)
|
tm.assert_series_equal(df.iloc[:, 2:].dtypes, Series({"c": np.float_}))
|
tm.assert_series_equal(
|
df.dtypes,
|
Series({"a": np.float_, "b": np.float_, "c": np.float_}),
|
)
|
|
@pytest.mark.parametrize(
|
"data",
|
[pd.NA, True],
|
)
|
def test_dtypes_are_correct_after_groupby_last(self, data):
|
# GH46409
|
df = DataFrame(
|
{"id": [1, 2, 3, 4], "test": [True, pd.NA, data, False]}
|
).convert_dtypes()
|
result = df.groupby("id").last().test
|
expected = df.set_index("id").test
|
assert result.dtype == pd.BooleanDtype()
|
tm.assert_series_equal(expected, result)
|
|
def test_dtypes_gh8722(self, float_string_frame):
|
float_string_frame["bool"] = float_string_frame["A"] > 0
|
result = float_string_frame.dtypes
|
expected = Series(
|
{k: v.dtype for k, v in float_string_frame.items()}, index=result.index
|
)
|
tm.assert_series_equal(result, expected)
|
|
# compat, GH 8722
|
with option_context("use_inf_as_na", True):
|
df = DataFrame([[1]])
|
result = df.dtypes
|
tm.assert_series_equal(result, Series({0: np.dtype("int64")}))
|
|
def test_dtypes_timedeltas(self):
|
df = DataFrame(
|
{
|
"A": Series(date_range("2012-1-1", periods=3, freq="D")),
|
"B": Series([timedelta(days=i) for i in range(3)]),
|
}
|
)
|
result = df.dtypes
|
expected = Series(
|
[np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")], index=list("AB")
|
)
|
tm.assert_series_equal(result, expected)
|
|
df["C"] = df["A"] + df["B"]
|
result = df.dtypes
|
expected = Series(
|
[
|
np.dtype("datetime64[ns]"),
|
np.dtype("timedelta64[ns]"),
|
np.dtype("datetime64[ns]"),
|
],
|
index=list("ABC"),
|
)
|
tm.assert_series_equal(result, expected)
|
|
# mixed int types
|
df["D"] = 1
|
result = df.dtypes
|
expected = Series(
|
[
|
np.dtype("datetime64[ns]"),
|
np.dtype("timedelta64[ns]"),
|
np.dtype("datetime64[ns]"),
|
np.dtype("int64"),
|
],
|
index=list("ABCD"),
|
)
|
tm.assert_series_equal(result, expected)
|
|
def test_frame_apply_np_array_return_type(self):
|
# GH 35517
|
df = DataFrame([["foo"]])
|
result = df.apply(lambda col: np.array("bar"))
|
expected = Series(["bar"])
|
tm.assert_series_equal(result, expected)
|