import numpy as np
|
import pytest
|
|
from pandas import (
|
Categorical,
|
CategoricalDtype,
|
NaT,
|
Timestamp,
|
array,
|
to_datetime,
|
)
|
import pandas._testing as tm
|
|
|
class TestAstype:
|
def test_astype_str_int_categories_to_nullable_int(self):
|
# GH#39616
|
dtype = CategoricalDtype([str(i) for i in range(5)])
|
codes = np.random.randint(5, size=20)
|
arr = Categorical.from_codes(codes, dtype=dtype)
|
|
res = arr.astype("Int64")
|
expected = array(codes, dtype="Int64")
|
tm.assert_extension_array_equal(res, expected)
|
|
def test_astype_str_int_categories_to_nullable_float(self):
|
# GH#39616
|
dtype = CategoricalDtype([str(i / 2) for i in range(5)])
|
codes = np.random.randint(5, size=20)
|
arr = Categorical.from_codes(codes, dtype=dtype)
|
|
res = arr.astype("Float64")
|
expected = array(codes, dtype="Float64") / 2
|
tm.assert_extension_array_equal(res, expected)
|
|
@pytest.mark.parametrize("ordered", [True, False])
|
def test_astype(self, ordered):
|
# string
|
cat = Categorical(list("abbaaccc"), ordered=ordered)
|
result = cat.astype(object)
|
expected = np.array(cat)
|
tm.assert_numpy_array_equal(result, expected)
|
|
msg = r"Cannot cast object dtype to float64"
|
with pytest.raises(ValueError, match=msg):
|
cat.astype(float)
|
|
# numeric
|
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
|
result = cat.astype(object)
|
expected = np.array(cat, dtype=object)
|
tm.assert_numpy_array_equal(result, expected)
|
|
result = cat.astype(int)
|
expected = np.array(cat, dtype="int")
|
tm.assert_numpy_array_equal(result, expected)
|
|
result = cat.astype(float)
|
expected = np.array(cat, dtype=float)
|
tm.assert_numpy_array_equal(result, expected)
|
|
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
@pytest.mark.parametrize("cat_ordered", [True, False])
|
def test_astype_category(self, dtype_ordered, cat_ordered):
|
# GH#10696/GH#18593
|
data = list("abcaacbab")
|
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
|
|
# standard categories
|
dtype = CategoricalDtype(ordered=dtype_ordered)
|
result = cat.astype(dtype)
|
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
|
tm.assert_categorical_equal(result, expected)
|
|
# non-standard categories
|
dtype = CategoricalDtype(list("adc"), dtype_ordered)
|
result = cat.astype(dtype)
|
expected = Categorical(data, dtype=dtype)
|
tm.assert_categorical_equal(result, expected)
|
|
if dtype_ordered is False:
|
# dtype='category' can't specify ordered, so only test once
|
result = cat.astype("category")
|
expected = cat
|
tm.assert_categorical_equal(result, expected)
|
|
def test_astype_object_datetime_categories(self):
|
# GH#40754
|
cat = Categorical(to_datetime(["2021-03-27", NaT]))
|
result = cat.astype(object)
|
expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
|
tm.assert_numpy_array_equal(result, expected)
|
|
def test_astype_object_timestamp_categories(self):
|
# GH#18024
|
cat = Categorical([Timestamp("2014-01-01")])
|
result = cat.astype(object)
|
expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
|
tm.assert_numpy_array_equal(result, expected)
|