import numpy as np
|
import pytest
|
|
from pandas._libs.sparse import IntIndex
|
|
from pandas import (
|
DataFrame,
|
Series,
|
Timestamp,
|
)
|
import pandas._testing as tm
|
from pandas.core.arrays.sparse import (
|
SparseArray,
|
SparseDtype,
|
)
|
|
|
class TestAstype:
|
def test_astype(self):
|
# float -> float
|
arr = SparseArray([None, None, 0, 2])
|
result = arr.astype("Sparse[float32]")
|
expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32"))
|
tm.assert_sp_array_equal(result, expected)
|
|
dtype = SparseDtype("float64", fill_value=0)
|
result = arr.astype(dtype)
|
expected = SparseArray._simple_new(
|
np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype
|
)
|
tm.assert_sp_array_equal(result, expected)
|
|
dtype = SparseDtype("int64", 0)
|
result = arr.astype(dtype)
|
expected = SparseArray._simple_new(
|
np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype
|
)
|
tm.assert_sp_array_equal(result, expected)
|
|
arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
|
with pytest.raises(ValueError, match="NA"):
|
arr.astype("Sparse[i8]")
|
|
def test_astype_bool(self):
|
a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0))
|
result = a.astype(bool)
|
expected = np.array([1, 0, 0, 1], dtype=bool)
|
tm.assert_numpy_array_equal(result, expected)
|
|
# update fill value
|
result = a.astype(SparseDtype(bool, False))
|
expected = SparseArray(
|
[True, False, False, True], dtype=SparseDtype(bool, False)
|
)
|
tm.assert_sp_array_equal(result, expected)
|
|
def test_astype_all(self, any_real_numpy_dtype):
|
vals = np.array([1, 2, 3])
|
arr = SparseArray(vals, fill_value=1)
|
typ = np.dtype(any_real_numpy_dtype)
|
res = arr.astype(typ)
|
tm.assert_numpy_array_equal(res, vals.astype(any_real_numpy_dtype))
|
|
@pytest.mark.parametrize(
|
"arr, dtype, expected",
|
[
|
(
|
SparseArray([0, 1]),
|
"float",
|
SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)),
|
),
|
(SparseArray([0, 1]), bool, SparseArray([False, True])),
|
(
|
SparseArray([0, 1], fill_value=1),
|
bool,
|
SparseArray([False, True], dtype=SparseDtype(bool, True)),
|
),
|
pytest.param(
|
SparseArray([0, 1]),
|
"datetime64[ns]",
|
SparseArray(
|
np.array([0, 1], dtype="datetime64[ns]"),
|
dtype=SparseDtype("datetime64[ns]", Timestamp("1970")),
|
),
|
),
|
(
|
SparseArray([0, 1, 10]),
|
str,
|
SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")),
|
),
|
(SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),
|
(
|
SparseArray([0, 1, 0]),
|
object,
|
SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)),
|
),
|
],
|
)
|
def test_astype_more(self, arr, dtype, expected):
|
result = arr.astype(arr.dtype.update_dtype(dtype))
|
tm.assert_sp_array_equal(result, expected)
|
|
def test_astype_nan_raises(self):
|
arr = SparseArray([1.0, np.nan])
|
with pytest.raises(ValueError, match="Cannot convert non-finite"):
|
arr.astype(int)
|
|
def test_astype_copy_false(self):
|
# GH#34456 bug caused by using .view instead of .astype in astype_nansafe
|
arr = SparseArray([1, 2, 3])
|
|
dtype = SparseDtype(float, 0)
|
|
result = arr.astype(dtype, copy=False)
|
expected = SparseArray([1.0, 2.0, 3.0], fill_value=0.0)
|
tm.assert_sp_array_equal(result, expected)
|
|
def test_astype_dt64_to_int64(self):
|
# GH#49631 match non-sparse behavior
|
values = np.array(["NaT", "2016-01-02", "2016-01-03"], dtype="M8[ns]")
|
|
arr = SparseArray(values)
|
result = arr.astype("int64")
|
expected = values.astype("int64")
|
tm.assert_numpy_array_equal(result, expected)
|
|
# we should also be able to cast to equivalent Sparse[int64]
|
dtype_int64 = SparseDtype("int64", np.iinfo(np.int64).min)
|
result2 = arr.astype(dtype_int64)
|
tm.assert_numpy_array_equal(result2.to_numpy(), expected)
|
|
# GH#50087 we should match the non-sparse behavior regardless of
|
# if we have a fill_value other than NaT
|
dtype = SparseDtype("datetime64[ns]", values[1])
|
arr3 = SparseArray(values, dtype=dtype)
|
result3 = arr3.astype("int64")
|
tm.assert_numpy_array_equal(result3, expected)
|
|
|
def test_dtype_sparse_with_fill_value_not_present_in_data():
|
# GH 49987
|
df = DataFrame([["a", 0], ["b", 1], ["b", 2]], columns=["A", "B"])
|
result = df["A"].astype(SparseDtype("category", fill_value="c"))
|
expected = Series(
|
["a", "b", "b"], name="A", dtype=SparseDtype("object", fill_value="c")
|
)
|
tm.assert_series_equal(result, expected)
|