zmc
2023-12-22 9fdbf60165db0400c2e8e6be2dc6e88138ac719a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
import pytest
 
from pandas import (
    Categorical,
    CategoricalDtype,
    NaT,
    Timestamp,
    array,
    to_datetime,
)
import pandas._testing as tm
 
 
class TestAstype:
    def test_astype_str_int_categories_to_nullable_int(self):
        # GH#39616
        dtype = CategoricalDtype([str(i) for i in range(5)])
        codes = np.random.randint(5, size=20)
        arr = Categorical.from_codes(codes, dtype=dtype)
 
        res = arr.astype("Int64")
        expected = array(codes, dtype="Int64")
        tm.assert_extension_array_equal(res, expected)
 
    def test_astype_str_int_categories_to_nullable_float(self):
        # GH#39616
        dtype = CategoricalDtype([str(i / 2) for i in range(5)])
        codes = np.random.randint(5, size=20)
        arr = Categorical.from_codes(codes, dtype=dtype)
 
        res = arr.astype("Float64")
        expected = array(codes, dtype="Float64") / 2
        tm.assert_extension_array_equal(res, expected)
 
    @pytest.mark.parametrize("ordered", [True, False])
    def test_astype(self, ordered):
        # string
        cat = Categorical(list("abbaaccc"), ordered=ordered)
        result = cat.astype(object)
        expected = np.array(cat)
        tm.assert_numpy_array_equal(result, expected)
 
        msg = r"Cannot cast object dtype to float64"
        with pytest.raises(ValueError, match=msg):
            cat.astype(float)
 
        # numeric
        cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
        result = cat.astype(object)
        expected = np.array(cat, dtype=object)
        tm.assert_numpy_array_equal(result, expected)
 
        result = cat.astype(int)
        expected = np.array(cat, dtype="int")
        tm.assert_numpy_array_equal(result, expected)
 
        result = cat.astype(float)
        expected = np.array(cat, dtype=float)
        tm.assert_numpy_array_equal(result, expected)
 
    @pytest.mark.parametrize("dtype_ordered", [True, False])
    @pytest.mark.parametrize("cat_ordered", [True, False])
    def test_astype_category(self, dtype_ordered, cat_ordered):
        # GH#10696/GH#18593
        data = list("abcaacbab")
        cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
 
        # standard categories
        dtype = CategoricalDtype(ordered=dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
        tm.assert_categorical_equal(result, expected)
 
        # non-standard categories
        dtype = CategoricalDtype(list("adc"), dtype_ordered)
        result = cat.astype(dtype)
        expected = Categorical(data, dtype=dtype)
        tm.assert_categorical_equal(result, expected)
 
        if dtype_ordered is False:
            # dtype='category' can't specify ordered, so only test once
            result = cat.astype("category")
            expected = cat
            tm.assert_categorical_equal(result, expected)
 
    def test_astype_object_datetime_categories(self):
        # GH#40754
        cat = Categorical(to_datetime(["2021-03-27", NaT]))
        result = cat.astype(object)
        expected = np.array([Timestamp("2021-03-27 00:00:00"), NaT], dtype="object")
        tm.assert_numpy_array_equal(result, expected)
 
    def test_astype_object_timestamp_categories(self):
        # GH#18024
        cat = Categorical([Timestamp("2014-01-01")])
        result = cat.astype(object)
        expected = np.array([Timestamp("2014-01-01 00:00:00")], dtype="object")
        tm.assert_numpy_array_equal(result, expected)