import datetime import numpy as np import pytest import pandas as pd import pandas._testing as tm class TestConvertDtypes: @pytest.mark.parametrize( "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")] ) def test_convert_dtypes(self, convert_integer, expected, string_storage): # Specific types are tested in tests/series/test_dtypes.py # Just check that it works for DataFrame here df = pd.DataFrame( { "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), } ) with pd.option_context("string_storage", string_storage): result = df.convert_dtypes(True, True, convert_integer, False) expected = pd.DataFrame( { "a": pd.Series([1, 2, 3], dtype=expected), "b": pd.Series(["x", "y", "z"], dtype=f"string[{string_storage}]"), } ) tm.assert_frame_equal(result, expected) def test_convert_empty(self): # Empty DataFrame can pass convert_dtypes, see GH#40393 empty_df = pd.DataFrame() tm.assert_frame_equal(empty_df, empty_df.convert_dtypes()) def test_convert_dtypes_retain_column_names(self): # GH#41435 df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) df.columns.name = "cols" result = df.convert_dtypes() tm.assert_index_equal(result.columns, df.columns) assert result.columns.name == "cols" def test_pyarrow_dtype_backend(self): pa = pytest.importorskip("pyarrow") df = pd.DataFrame( { "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), "b": pd.Series(["x", "y", None], dtype=np.dtype("O")), "c": pd.Series([True, False, None], dtype=np.dtype("O")), "d": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), "e": pd.Series(pd.date_range("2022", periods=3)), "f": pd.Series(pd.timedelta_range("1D", periods=3)), } ) result = df.convert_dtypes(dtype_backend="pyarrow") expected = pd.DataFrame( { "a": pd.arrays.ArrowExtensionArray( pa.array([1, 2, 3], type=pa.int32()) ), "b": pd.arrays.ArrowExtensionArray(pa.array(["x", "y", None])), "c": pd.arrays.ArrowExtensionArray(pa.array([True, False, None])), "d": pd.arrays.ArrowExtensionArray(pa.array([None, 100.5, 200.0])), "e": pd.arrays.ArrowExtensionArray( pa.array( [ datetime.datetime(2022, 1, 1), datetime.datetime(2022, 1, 2), datetime.datetime(2022, 1, 3), ], type=pa.timestamp(unit="ns"), ) ), "f": pd.arrays.ArrowExtensionArray( pa.array( [ datetime.timedelta(1), datetime.timedelta(2), datetime.timedelta(3), ], type=pa.duration("ns"), ) ), } ) tm.assert_frame_equal(result, expected) def test_pyarrow_dtype_backend_already_pyarrow(self): pytest.importorskip("pyarrow") expected = pd.DataFrame([1, 2, 3], dtype="int64[pyarrow]") result = expected.convert_dtypes(dtype_backend="pyarrow") tm.assert_frame_equal(result, expected) def test_pyarrow_dtype_backend_from_pandas_nullable(self): pa = pytest.importorskip("pyarrow") df = pd.DataFrame( { "a": pd.Series([1, 2, None], dtype="Int32"), "b": pd.Series(["x", "y", None], dtype="string[python]"), "c": pd.Series([True, False, None], dtype="boolean"), "d": pd.Series([None, 100.5, 200], dtype="Float64"), } ) result = df.convert_dtypes(dtype_backend="pyarrow") expected = pd.DataFrame( { "a": pd.arrays.ArrowExtensionArray( pa.array([1, 2, None], type=pa.int32()) ), "b": pd.arrays.ArrowExtensionArray(pa.array(["x", "y", None])), "c": pd.arrays.ArrowExtensionArray(pa.array([True, False, None])), "d": pd.arrays.ArrowExtensionArray(pa.array([None, 100.5, 200.0])), } ) tm.assert_frame_equal(result, expected) def test_pyarrow_dtype_empty_object(self): # GH 50970 pytest.importorskip("pyarrow") expected = pd.DataFrame(columns=[0]) result = expected.convert_dtypes(dtype_backend="pyarrow") tm.assert_frame_equal(result, expected) def test_pyarrow_engine_lines_false(self): # GH 48893 df = pd.DataFrame({"a": [1, 2, 3]}) msg = ( "dtype_backend numpy is invalid, only 'numpy_nullable' and " "'pyarrow' are allowed." ) with pytest.raises(ValueError, match=msg): df.convert_dtypes(dtype_backend="numpy")