"""
|
Common type operations.
|
"""
|
from __future__ import annotations
|
|
from typing import (
|
Any,
|
Callable,
|
)
|
import warnings
|
|
import numpy as np
|
|
from pandas._libs import (
|
Interval,
|
Period,
|
algos,
|
lib,
|
)
|
from pandas._libs.tslibs import conversion
|
from pandas._typing import (
|
ArrayLike,
|
DtypeObj,
|
)
|
|
from pandas.core.dtypes.base import _registry as registry
|
from pandas.core.dtypes.dtypes import (
|
CategoricalDtype,
|
DatetimeTZDtype,
|
ExtensionDtype,
|
IntervalDtype,
|
PeriodDtype,
|
)
|
from pandas.core.dtypes.generic import ABCIndex
|
from pandas.core.dtypes.inference import (
|
is_array_like,
|
is_bool,
|
is_complex,
|
is_dataclass,
|
is_decimal,
|
is_dict_like,
|
is_file_like,
|
is_float,
|
is_hashable,
|
is_integer,
|
is_interval,
|
is_iterator,
|
is_list_like,
|
is_named_tuple,
|
is_nested_list_like,
|
is_number,
|
is_re,
|
is_re_compilable,
|
is_scalar,
|
is_sequence,
|
)
|
|
DT64NS_DTYPE = conversion.DT64NS_DTYPE
|
TD64NS_DTYPE = conversion.TD64NS_DTYPE
|
INT64_DTYPE = np.dtype(np.int64)
|
|
# oh the troubles to reduce import time
|
_is_scipy_sparse = None
|
|
ensure_float64 = algos.ensure_float64
|
|
|
def ensure_float(arr):
|
"""
|
Ensure that an array object has a float dtype if possible.
|
|
Parameters
|
----------
|
arr : array-like
|
The array whose data type we want to enforce as float.
|
|
Returns
|
-------
|
float_arr : The original array cast to the float dtype if
|
possible. Otherwise, the original array is returned.
|
"""
|
if is_extension_array_dtype(arr.dtype):
|
if is_float_dtype(arr.dtype):
|
arr = arr.to_numpy(dtype=arr.dtype.numpy_dtype, na_value=np.nan)
|
else:
|
arr = arr.to_numpy(dtype="float64", na_value=np.nan)
|
elif issubclass(arr.dtype.type, (np.integer, np.bool_)):
|
arr = arr.astype(float)
|
return arr
|
|
|
ensure_int64 = algos.ensure_int64
|
ensure_int32 = algos.ensure_int32
|
ensure_int16 = algos.ensure_int16
|
ensure_int8 = algos.ensure_int8
|
ensure_platform_int = algos.ensure_platform_int
|
ensure_object = algos.ensure_object
|
ensure_uint64 = algos.ensure_uint64
|
|
|
def ensure_str(value: bytes | Any) -> str:
|
"""
|
Ensure that bytes and non-strings get converted into ``str`` objects.
|
"""
|
if isinstance(value, bytes):
|
value = value.decode("utf-8")
|
elif not isinstance(value, str):
|
value = str(value)
|
return value
|
|
|
def ensure_python_int(value: int | np.integer) -> int:
|
"""
|
Ensure that a value is a python int.
|
|
Parameters
|
----------
|
value: int or numpy.integer
|
|
Returns
|
-------
|
int
|
|
Raises
|
------
|
TypeError: if the value isn't an int or can't be converted to one.
|
"""
|
if not (is_integer(value) or is_float(value)):
|
if not is_scalar(value):
|
raise TypeError(
|
f"Value needs to be a scalar value, was type {type(value).__name__}"
|
)
|
raise TypeError(f"Wrong type {type(value)} for value {value}")
|
try:
|
new_value = int(value)
|
assert new_value == value
|
except (TypeError, ValueError, AssertionError) as err:
|
raise TypeError(f"Wrong type {type(value)} for value {value}") from err
|
return new_value
|
|
|
def classes(*klasses) -> Callable:
|
"""Evaluate if the tipo is a subclass of the klasses."""
|
return lambda tipo: issubclass(tipo, klasses)
|
|
|
def classes_and_not_datetimelike(*klasses) -> Callable:
|
"""
|
Evaluate if the tipo is a subclass of the klasses
|
and not a datetimelike.
|
"""
|
return lambda tipo: (
|
issubclass(tipo, klasses)
|
and not issubclass(tipo, (np.datetime64, np.timedelta64))
|
)
|
|
|
def is_object_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether an array-like or dtype is of the object dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array-like or dtype is of the object dtype.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_object_dtype
|
>>> is_object_dtype(object)
|
True
|
>>> is_object_dtype(int)
|
False
|
>>> is_object_dtype(np.array([], dtype=object))
|
True
|
>>> is_object_dtype(np.array([], dtype=int))
|
False
|
>>> is_object_dtype([1, 2, 3])
|
False
|
"""
|
return _is_dtype_type(arr_or_dtype, classes(np.object_))
|
|
|
def is_sparse(arr) -> bool:
|
"""
|
Check whether an array-like is a 1-D pandas sparse array.
|
|
Check that the one-dimensional array-like is a pandas sparse array.
|
Returns True if it is a pandas sparse array, not another type of
|
sparse array.
|
|
Parameters
|
----------
|
arr : array-like
|
Array-like to check.
|
|
Returns
|
-------
|
bool
|
Whether or not the array-like is a pandas sparse array.
|
|
Examples
|
--------
|
Returns `True` if the parameter is a 1-D pandas sparse array.
|
|
>>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0]))
|
True
|
>>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0])))
|
True
|
|
Returns `False` if the parameter is not sparse.
|
|
>>> is_sparse(np.array([0, 0, 1, 0]))
|
False
|
>>> is_sparse(pd.Series([0, 1, 0, 0]))
|
False
|
|
Returns `False` if the parameter is not a pandas sparse array.
|
|
>>> from scipy.sparse import bsr_matrix
|
>>> is_sparse(bsr_matrix([0, 1, 0, 0]))
|
False
|
|
Returns `False` if the parameter has more than one dimension.
|
"""
|
from pandas.core.arrays.sparse import SparseDtype
|
|
dtype = getattr(arr, "dtype", arr)
|
return isinstance(dtype, SparseDtype)
|
|
|
def is_scipy_sparse(arr) -> bool:
|
"""
|
Check whether an array-like is a scipy.sparse.spmatrix instance.
|
|
Parameters
|
----------
|
arr : array-like
|
The array-like to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array-like is a scipy.sparse.spmatrix instance.
|
|
Notes
|
-----
|
If scipy is not installed, this function will always return False.
|
|
Examples
|
--------
|
>>> from scipy.sparse import bsr_matrix
|
>>> is_scipy_sparse(bsr_matrix([1, 2, 3]))
|
True
|
>>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3]))
|
False
|
"""
|
global _is_scipy_sparse
|
|
if _is_scipy_sparse is None: # pylint: disable=used-before-assignment
|
try:
|
from scipy.sparse import issparse as _is_scipy_sparse
|
except ImportError:
|
_is_scipy_sparse = lambda _: False
|
|
assert _is_scipy_sparse is not None
|
return _is_scipy_sparse(arr)
|
|
|
def is_datetime64_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether an array-like or dtype is of the datetime64 dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array-like or dtype is of the datetime64 dtype.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_datetime64_dtype
|
>>> is_datetime64_dtype(object)
|
False
|
>>> is_datetime64_dtype(np.datetime64)
|
True
|
>>> is_datetime64_dtype(np.array([], dtype=int))
|
False
|
>>> is_datetime64_dtype(np.array([], dtype=np.datetime64))
|
True
|
>>> is_datetime64_dtype([1, 2, 3])
|
False
|
"""
|
if isinstance(arr_or_dtype, np.dtype):
|
# GH#33400 fastpath for dtype object
|
return arr_or_dtype.kind == "M"
|
return _is_dtype_type(arr_or_dtype, classes(np.datetime64))
|
|
|
def is_datetime64tz_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether an array-like or dtype is of a DatetimeTZDtype dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array-like or dtype is of a DatetimeTZDtype dtype.
|
|
Examples
|
--------
|
>>> is_datetime64tz_dtype(object)
|
False
|
>>> is_datetime64tz_dtype([1, 2, 3])
|
False
|
>>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive
|
False
|
>>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
|
True
|
|
>>> dtype = DatetimeTZDtype("ns", tz="US/Eastern")
|
>>> s = pd.Series([], dtype=dtype)
|
>>> is_datetime64tz_dtype(dtype)
|
True
|
>>> is_datetime64tz_dtype(s)
|
True
|
"""
|
if isinstance(arr_or_dtype, DatetimeTZDtype):
|
# GH#33400 fastpath for dtype object
|
# GH 34986
|
return True
|
|
if arr_or_dtype is None:
|
return False
|
return DatetimeTZDtype.is_dtype(arr_or_dtype)
|
|
|
def is_timedelta64_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether an array-like or dtype is of the timedelta64 dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array-like or dtype is of the timedelta64 dtype.
|
|
Examples
|
--------
|
>>> from pandas.core.dtypes.common import is_timedelta64_dtype
|
>>> is_timedelta64_dtype(object)
|
False
|
>>> is_timedelta64_dtype(np.timedelta64)
|
True
|
>>> is_timedelta64_dtype([1, 2, 3])
|
False
|
>>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))
|
True
|
>>> is_timedelta64_dtype('0 days')
|
False
|
"""
|
if isinstance(arr_or_dtype, np.dtype):
|
# GH#33400 fastpath for dtype object
|
return arr_or_dtype.kind == "m"
|
|
return _is_dtype_type(arr_or_dtype, classes(np.timedelta64))
|
|
|
def is_period_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether an array-like or dtype is of the Period dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array-like or dtype is of the Period dtype.
|
|
Examples
|
--------
|
>>> is_period_dtype(object)
|
False
|
>>> is_period_dtype(PeriodDtype(freq="D"))
|
True
|
>>> is_period_dtype([1, 2, 3])
|
False
|
>>> is_period_dtype(pd.Period("2017-01-01"))
|
False
|
>>> is_period_dtype(pd.PeriodIndex([], freq="A"))
|
True
|
"""
|
if isinstance(arr_or_dtype, ExtensionDtype):
|
# GH#33400 fastpath for dtype object
|
return arr_or_dtype.type is Period
|
|
if arr_or_dtype is None:
|
return False
|
return PeriodDtype.is_dtype(arr_or_dtype)
|
|
|
def is_interval_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether an array-like or dtype is of the Interval dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array-like or dtype is of the Interval dtype.
|
|
Examples
|
--------
|
>>> is_interval_dtype(object)
|
False
|
>>> is_interval_dtype(IntervalDtype())
|
True
|
>>> is_interval_dtype([1, 2, 3])
|
False
|
>>>
|
>>> interval = pd.Interval(1, 2, closed="right")
|
>>> is_interval_dtype(interval)
|
False
|
>>> is_interval_dtype(pd.IntervalIndex([interval]))
|
True
|
"""
|
if isinstance(arr_or_dtype, ExtensionDtype):
|
# GH#33400 fastpath for dtype object
|
return arr_or_dtype.type is Interval
|
|
if arr_or_dtype is None:
|
return False
|
return IntervalDtype.is_dtype(arr_or_dtype)
|
|
|
def is_categorical_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether an array-like or dtype is of the Categorical dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array-like or dtype is of the Categorical dtype.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_categorical_dtype
|
>>> from pandas import CategoricalDtype
|
>>> is_categorical_dtype(object)
|
False
|
>>> is_categorical_dtype(CategoricalDtype())
|
True
|
>>> is_categorical_dtype([1, 2, 3])
|
False
|
>>> is_categorical_dtype(pd.Categorical([1, 2, 3]))
|
True
|
>>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
|
True
|
"""
|
if isinstance(arr_or_dtype, ExtensionDtype):
|
# GH#33400 fastpath for dtype object
|
return arr_or_dtype.name == "category"
|
|
if arr_or_dtype is None:
|
return False
|
return CategoricalDtype.is_dtype(arr_or_dtype)
|
|
|
def is_string_or_object_np_dtype(dtype: np.dtype) -> bool:
|
"""
|
Faster alternative to is_string_dtype, assumes we have a np.dtype object.
|
"""
|
return dtype == object or dtype.kind in "SU"
|
|
|
def is_string_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of the string dtype.
|
|
If an array is passed with an object dtype, the elements must be
|
inferred as strings.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of the string dtype.
|
|
Examples
|
--------
|
>>> is_string_dtype(str)
|
True
|
>>> is_string_dtype(object)
|
True
|
>>> is_string_dtype(int)
|
False
|
>>> is_string_dtype(np.array(['a', 'b']))
|
True
|
>>> is_string_dtype(pd.Series([1, 2]))
|
False
|
>>> is_string_dtype(pd.Series([1, 2], dtype=object))
|
False
|
"""
|
if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O":
|
return is_all_strings(arr_or_dtype)
|
|
def condition(dtype) -> bool:
|
if is_string_or_object_np_dtype(dtype):
|
return True
|
try:
|
return dtype == "string"
|
except TypeError:
|
return False
|
|
return _is_dtype(arr_or_dtype, condition)
|
|
|
def is_dtype_equal(source, target) -> bool:
|
"""
|
Check if two dtypes are equal.
|
|
Parameters
|
----------
|
source : The first dtype to compare
|
target : The second dtype to compare
|
|
Returns
|
-------
|
boolean
|
Whether or not the two dtypes are equal.
|
|
Examples
|
--------
|
>>> is_dtype_equal(int, float)
|
False
|
>>> is_dtype_equal("int", int)
|
True
|
>>> is_dtype_equal(object, "category")
|
False
|
>>> is_dtype_equal(CategoricalDtype(), "category")
|
True
|
>>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64")
|
False
|
"""
|
if isinstance(target, str):
|
if not isinstance(source, str):
|
# GH#38516 ensure we get the same behavior from
|
# is_dtype_equal(CDT, "category") and CDT == "category"
|
try:
|
src = get_dtype(source)
|
if isinstance(src, ExtensionDtype):
|
return src == target
|
except (TypeError, AttributeError, ImportError):
|
return False
|
elif isinstance(source, str):
|
return is_dtype_equal(target, source)
|
|
try:
|
source = get_dtype(source)
|
target = get_dtype(target)
|
return source == target
|
except (TypeError, AttributeError, ImportError):
|
# invalid comparison
|
# object == category will hit this
|
return False
|
|
|
def is_any_int_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of an integer dtype.
|
|
In this function, timedelta64 instances are also considered "any-integer"
|
type objects and will return True.
|
|
This function is internal and should not be exposed in the public API.
|
|
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
|
as integer by this function.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of an integer dtype.
|
|
Examples
|
--------
|
>>> is_any_int_dtype(str)
|
False
|
>>> is_any_int_dtype(int)
|
True
|
>>> is_any_int_dtype(float)
|
False
|
>>> is_any_int_dtype(np.uint64)
|
True
|
>>> is_any_int_dtype(np.datetime64)
|
False
|
>>> is_any_int_dtype(np.timedelta64)
|
True
|
>>> is_any_int_dtype(np.array(['a', 'b']))
|
False
|
>>> is_any_int_dtype(pd.Series([1, 2]))
|
True
|
>>> is_any_int_dtype(np.array([], dtype=np.timedelta64))
|
True
|
>>> is_any_int_dtype(pd.Index([1, 2.])) # float
|
False
|
"""
|
return _is_dtype_type(
|
arr_or_dtype, classes(np.integer, np.timedelta64)
|
) or _is_dtype(
|
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "iu"
|
)
|
|
|
def is_integer_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of an integer dtype.
|
|
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
|
|
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
|
as integer by this function.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of an integer dtype and
|
not an instance of timedelta64.
|
|
Examples
|
--------
|
>>> is_integer_dtype(str)
|
False
|
>>> is_integer_dtype(int)
|
True
|
>>> is_integer_dtype(float)
|
False
|
>>> is_integer_dtype(np.uint64)
|
True
|
>>> is_integer_dtype('int8')
|
True
|
>>> is_integer_dtype('Int8')
|
True
|
>>> is_integer_dtype(pd.Int8Dtype)
|
True
|
>>> is_integer_dtype(np.datetime64)
|
False
|
>>> is_integer_dtype(np.timedelta64)
|
False
|
>>> is_integer_dtype(np.array(['a', 'b']))
|
False
|
>>> is_integer_dtype(pd.Series([1, 2]))
|
True
|
>>> is_integer_dtype(np.array([], dtype=np.timedelta64))
|
False
|
>>> is_integer_dtype(pd.Index([1, 2.])) # float
|
False
|
"""
|
return _is_dtype_type(
|
arr_or_dtype, classes_and_not_datetimelike(np.integer)
|
) or _is_dtype(
|
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "iu"
|
)
|
|
|
def is_signed_integer_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of a signed integer dtype.
|
|
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
|
|
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
|
as integer by this function.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of a signed integer dtype
|
and not an instance of timedelta64.
|
|
Examples
|
--------
|
>>> is_signed_integer_dtype(str)
|
False
|
>>> is_signed_integer_dtype(int)
|
True
|
>>> is_signed_integer_dtype(float)
|
False
|
>>> is_signed_integer_dtype(np.uint64) # unsigned
|
False
|
>>> is_signed_integer_dtype('int8')
|
True
|
>>> is_signed_integer_dtype('Int8')
|
True
|
>>> is_signed_integer_dtype(pd.Int8Dtype)
|
True
|
>>> is_signed_integer_dtype(np.datetime64)
|
False
|
>>> is_signed_integer_dtype(np.timedelta64)
|
False
|
>>> is_signed_integer_dtype(np.array(['a', 'b']))
|
False
|
>>> is_signed_integer_dtype(pd.Series([1, 2]))
|
True
|
>>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64))
|
False
|
>>> is_signed_integer_dtype(pd.Index([1, 2.])) # float
|
False
|
>>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned
|
False
|
"""
|
return _is_dtype_type(
|
arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)
|
) or _is_dtype(
|
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind == "i"
|
)
|
|
|
def is_unsigned_integer_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of an unsigned integer dtype.
|
|
The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also
|
considered as integer by this function.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of an unsigned integer dtype.
|
|
Examples
|
--------
|
>>> is_unsigned_integer_dtype(str)
|
False
|
>>> is_unsigned_integer_dtype(int) # signed
|
False
|
>>> is_unsigned_integer_dtype(float)
|
False
|
>>> is_unsigned_integer_dtype(np.uint64)
|
True
|
>>> is_unsigned_integer_dtype('uint8')
|
True
|
>>> is_unsigned_integer_dtype('UInt8')
|
True
|
>>> is_unsigned_integer_dtype(pd.UInt8Dtype)
|
True
|
>>> is_unsigned_integer_dtype(np.array(['a', 'b']))
|
False
|
>>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed
|
False
|
>>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float
|
False
|
>>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32))
|
True
|
"""
|
return _is_dtype_type(
|
arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger)
|
) or _is_dtype(
|
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind == "u"
|
)
|
|
|
def is_int64_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of the int64 dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of the int64 dtype.
|
|
Notes
|
-----
|
Depending on system architecture, the return value of `is_int64_dtype(
|
int)` will be True if the OS uses 64-bit integers and False if the OS
|
uses 32-bit integers.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_int64_dtype
|
>>> is_int64_dtype(str)
|
False
|
>>> is_int64_dtype(np.int32)
|
False
|
>>> is_int64_dtype(np.int64)
|
True
|
>>> is_int64_dtype('int8')
|
False
|
>>> is_int64_dtype('Int8')
|
False
|
>>> is_int64_dtype(pd.Int64Dtype)
|
True
|
>>> is_int64_dtype(float)
|
False
|
>>> is_int64_dtype(np.uint64) # unsigned
|
False
|
>>> is_int64_dtype(np.array(['a', 'b']))
|
False
|
>>> is_int64_dtype(np.array([1, 2], dtype=np.int64))
|
True
|
>>> is_int64_dtype(pd.Index([1, 2.])) # float
|
False
|
>>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned
|
False
|
"""
|
return _is_dtype_type(arr_or_dtype, classes(np.int64))
|
|
|
def is_datetime64_any_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of the datetime64 dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
bool
|
Whether or not the array or dtype is of the datetime64 dtype.
|
|
Examples
|
--------
|
>>> is_datetime64_any_dtype(str)
|
False
|
>>> is_datetime64_any_dtype(int)
|
False
|
>>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive
|
True
|
>>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern"))
|
True
|
>>> is_datetime64_any_dtype(np.array(['a', 'b']))
|
False
|
>>> is_datetime64_any_dtype(np.array([1, 2]))
|
False
|
>>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]"))
|
True
|
>>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
|
True
|
"""
|
if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)):
|
# GH#33400 fastpath for dtype object
|
return arr_or_dtype.kind == "M"
|
|
if arr_or_dtype is None:
|
return False
|
return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype)
|
|
|
def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of the datetime64[ns] dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
bool
|
Whether or not the array or dtype is of the datetime64[ns] dtype.
|
|
Examples
|
--------
|
>>> is_datetime64_ns_dtype(str)
|
False
|
>>> is_datetime64_ns_dtype(int)
|
False
|
>>> is_datetime64_ns_dtype(np.datetime64) # no unit
|
False
|
>>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern"))
|
True
|
>>> is_datetime64_ns_dtype(np.array(['a', 'b']))
|
False
|
>>> is_datetime64_ns_dtype(np.array([1, 2]))
|
False
|
>>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit
|
False
|
>>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit
|
False
|
>>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
|
True
|
"""
|
if arr_or_dtype is None:
|
return False
|
try:
|
tipo = get_dtype(arr_or_dtype)
|
except TypeError:
|
if is_datetime64tz_dtype(arr_or_dtype):
|
tipo = get_dtype(arr_or_dtype.dtype)
|
else:
|
return False
|
return tipo == DT64NS_DTYPE or (
|
isinstance(tipo, DatetimeTZDtype) and tipo.unit == "ns"
|
)
|
|
|
def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of the timedelta64[ns] dtype.
|
|
This is a very specific dtype, so generic ones like `np.timedelta64`
|
will return False if passed into this function.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of the timedelta64[ns] dtype.
|
|
Examples
|
--------
|
>>> from pandas.core.dtypes.common import is_timedelta64_ns_dtype
|
>>> is_timedelta64_ns_dtype(np.dtype('m8[ns]'))
|
True
|
>>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency
|
False
|
>>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]'))
|
True
|
>>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64))
|
False
|
"""
|
return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE)
|
|
|
def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of
|
a timedelta64 or datetime64 dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of a timedelta64,
|
or datetime64 dtype.
|
|
Examples
|
--------
|
>>> is_datetime_or_timedelta_dtype(str)
|
False
|
>>> is_datetime_or_timedelta_dtype(int)
|
False
|
>>> is_datetime_or_timedelta_dtype(np.datetime64)
|
True
|
>>> is_datetime_or_timedelta_dtype(np.timedelta64)
|
True
|
>>> is_datetime_or_timedelta_dtype(np.array(['a', 'b']))
|
False
|
>>> is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
|
False
|
>>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64))
|
True
|
>>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64))
|
True
|
"""
|
return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64))
|
|
|
# This exists to silence numpy deprecation warnings, see GH#29553
|
def is_numeric_v_string_like(a: ArrayLike, b) -> bool:
|
"""
|
Check if we are comparing a string-like object to a numeric ndarray.
|
NumPy doesn't like to compare such objects, especially numeric arrays
|
and scalar string-likes.
|
|
Parameters
|
----------
|
a : array-like, scalar
|
The first object to check.
|
b : array-like, scalar
|
The second object to check.
|
|
Returns
|
-------
|
boolean
|
Whether we return a comparing a string-like object to a numeric array.
|
|
Examples
|
--------
|
>>> is_numeric_v_string_like(np.array([1]), "foo")
|
True
|
>>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
|
True
|
>>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
|
True
|
>>> is_numeric_v_string_like(np.array([1]), np.array([2]))
|
False
|
>>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))
|
False
|
"""
|
is_a_array = isinstance(a, np.ndarray)
|
is_b_array = isinstance(b, np.ndarray)
|
|
is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b")
|
is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b")
|
is_a_string_array = is_a_array and a.dtype.kind in ("S", "U")
|
is_b_string_array = is_b_array and b.dtype.kind in ("S", "U")
|
|
is_b_scalar_string_like = not is_b_array and isinstance(b, str)
|
|
return (
|
(is_a_numeric_array and is_b_scalar_string_like)
|
or (is_a_numeric_array and is_b_string_array)
|
or (is_b_numeric_array and is_a_string_array)
|
)
|
|
|
def needs_i8_conversion(arr_or_dtype) -> bool:
|
"""
|
Check whether the array or dtype should be converted to int64.
|
|
An array-like or dtype "needs" such a conversion if the array-like
|
or dtype is of a datetime-like dtype
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype should be converted to int64.
|
|
Examples
|
--------
|
>>> needs_i8_conversion(str)
|
False
|
>>> needs_i8_conversion(np.int64)
|
False
|
>>> needs_i8_conversion(np.datetime64)
|
True
|
>>> needs_i8_conversion(np.array(['a', 'b']))
|
False
|
>>> needs_i8_conversion(pd.Series([1, 2]))
|
False
|
>>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]"))
|
True
|
>>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
|
True
|
"""
|
if arr_or_dtype is None:
|
return False
|
if isinstance(arr_or_dtype, np.dtype):
|
return arr_or_dtype.kind in ["m", "M"]
|
elif isinstance(arr_or_dtype, ExtensionDtype):
|
return isinstance(arr_or_dtype, (PeriodDtype, DatetimeTZDtype))
|
|
try:
|
dtype = get_dtype(arr_or_dtype)
|
except (TypeError, ValueError):
|
return False
|
if isinstance(dtype, np.dtype):
|
return dtype.kind in ["m", "M"]
|
return isinstance(dtype, (PeriodDtype, DatetimeTZDtype))
|
|
|
def is_numeric_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of a numeric dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of a numeric dtype.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_numeric_dtype
|
>>> is_numeric_dtype(str)
|
False
|
>>> is_numeric_dtype(int)
|
True
|
>>> is_numeric_dtype(float)
|
True
|
>>> is_numeric_dtype(np.uint64)
|
True
|
>>> is_numeric_dtype(np.datetime64)
|
False
|
>>> is_numeric_dtype(np.timedelta64)
|
False
|
>>> is_numeric_dtype(np.array(['a', 'b']))
|
False
|
>>> is_numeric_dtype(pd.Series([1, 2]))
|
True
|
>>> is_numeric_dtype(pd.Index([1, 2.]))
|
True
|
>>> is_numeric_dtype(np.array([], dtype=np.timedelta64))
|
False
|
"""
|
return _is_dtype_type(
|
arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)
|
) or _is_dtype(
|
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ._is_numeric
|
)
|
|
|
def is_any_real_numeric_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of a real number dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of a real number dtype.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_any_real_numeric_dtype
|
>>> is_any_real_numeric_dtype(int)
|
True
|
>>> is_any_real_numeric_dtype(float)
|
True
|
>>> is_any_real_numeric_dtype(object)
|
False
|
>>> is_any_real_numeric_dtype(str)
|
False
|
>>> is_any_real_numeric_dtype(complex(1, 2))
|
False
|
>>> is_any_real_numeric_dtype(bool)
|
False
|
"""
|
return (
|
is_numeric_dtype(arr_or_dtype)
|
and not is_complex_dtype(arr_or_dtype)
|
and not is_bool_dtype(arr_or_dtype)
|
)
|
|
|
def is_float_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of a float dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of a float dtype.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_float_dtype
|
>>> is_float_dtype(str)
|
False
|
>>> is_float_dtype(int)
|
False
|
>>> is_float_dtype(float)
|
True
|
>>> is_float_dtype(np.array(['a', 'b']))
|
False
|
>>> is_float_dtype(pd.Series([1, 2]))
|
False
|
>>> is_float_dtype(pd.Index([1, 2.]))
|
True
|
"""
|
return _is_dtype_type(arr_or_dtype, classes(np.floating)) or _is_dtype(
|
arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "f"
|
)
|
|
|
def is_bool_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of a boolean dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of a boolean dtype.
|
|
Notes
|
-----
|
An ExtensionArray is considered boolean when the ``_is_boolean``
|
attribute is set to True.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_bool_dtype
|
>>> is_bool_dtype(str)
|
False
|
>>> is_bool_dtype(int)
|
False
|
>>> is_bool_dtype(bool)
|
True
|
>>> is_bool_dtype(np.bool_)
|
True
|
>>> is_bool_dtype(np.array(['a', 'b']))
|
False
|
>>> is_bool_dtype(pd.Series([1, 2]))
|
False
|
>>> is_bool_dtype(np.array([True, False]))
|
True
|
>>> is_bool_dtype(pd.Categorical([True, False]))
|
True
|
>>> is_bool_dtype(pd.arrays.SparseArray([True, False]))
|
True
|
"""
|
if arr_or_dtype is None:
|
return False
|
try:
|
dtype = get_dtype(arr_or_dtype)
|
except (TypeError, ValueError):
|
return False
|
|
if isinstance(dtype, CategoricalDtype):
|
arr_or_dtype = dtype.categories
|
# now we use the special definition for Index
|
|
if isinstance(arr_or_dtype, ABCIndex):
|
# Allow Index[object] that is all-bools or Index["boolean"]
|
return arr_or_dtype.inferred_type == "boolean"
|
elif isinstance(dtype, ExtensionDtype):
|
return getattr(dtype, "_is_boolean", False)
|
|
return issubclass(dtype.type, np.bool_)
|
|
|
def is_1d_only_ea_obj(obj: Any) -> bool:
|
"""
|
ExtensionArray that does not support 2D, or more specifically that does
|
not use HybridBlock.
|
"""
|
from pandas.core.arrays import (
|
DatetimeArray,
|
ExtensionArray,
|
PeriodArray,
|
TimedeltaArray,
|
)
|
|
return isinstance(obj, ExtensionArray) and not isinstance(
|
obj, (DatetimeArray, TimedeltaArray, PeriodArray)
|
)
|
|
|
def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool:
|
"""
|
Analogue to is_extension_array_dtype but excluding DatetimeTZDtype.
|
"""
|
# Note: if other EA dtypes are ever held in HybridBlock, exclude those
|
# here too.
|
# NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype
|
# to exclude ArrowTimestampUSDtype
|
return isinstance(dtype, ExtensionDtype) and not isinstance(
|
dtype, (DatetimeTZDtype, PeriodDtype)
|
)
|
|
|
def is_extension_array_dtype(arr_or_dtype) -> bool:
|
"""
|
Check if an object is a pandas extension array type.
|
|
See the :ref:`Use Guide <extending.extension-types>` for more.
|
|
Parameters
|
----------
|
arr_or_dtype : object
|
For array-like input, the ``.dtype`` attribute will
|
be extracted.
|
|
Returns
|
-------
|
bool
|
Whether the `arr_or_dtype` is an extension array type.
|
|
Notes
|
-----
|
This checks whether an object implements the pandas extension
|
array interface. In pandas, this includes:
|
|
* Categorical
|
* Sparse
|
* Interval
|
* Period
|
* DatetimeArray
|
* TimedeltaArray
|
|
Third-party libraries may implement arrays or types satisfying
|
this interface as well.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_extension_array_dtype
|
>>> arr = pd.Categorical(['a', 'b'])
|
>>> is_extension_array_dtype(arr)
|
True
|
>>> is_extension_array_dtype(arr.dtype)
|
True
|
|
>>> arr = np.array(['a', 'b'])
|
>>> is_extension_array_dtype(arr.dtype)
|
False
|
"""
|
dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype)
|
if isinstance(dtype, ExtensionDtype):
|
return True
|
elif isinstance(dtype, np.dtype):
|
return False
|
else:
|
return registry.find(dtype) is not None
|
|
|
def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool:
|
"""
|
Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype.
|
|
Notes
|
-----
|
Checks only for dtype objects, not dtype-castable strings or types.
|
"""
|
return isinstance(dtype, ExtensionDtype) or (
|
isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]
|
)
|
|
|
def is_complex_dtype(arr_or_dtype) -> bool:
|
"""
|
Check whether the provided array or dtype is of a complex dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array or dtype to check.
|
|
Returns
|
-------
|
boolean
|
Whether or not the array or dtype is of a complex dtype.
|
|
Examples
|
--------
|
>>> from pandas.api.types import is_complex_dtype
|
>>> is_complex_dtype(str)
|
False
|
>>> is_complex_dtype(int)
|
False
|
>>> is_complex_dtype(np.complex_)
|
True
|
>>> is_complex_dtype(np.array(['a', 'b']))
|
False
|
>>> is_complex_dtype(pd.Series([1, 2]))
|
False
|
>>> is_complex_dtype(np.array([1 + 1j, 5]))
|
True
|
"""
|
return _is_dtype_type(arr_or_dtype, classes(np.complexfloating))
|
|
|
def _is_dtype(arr_or_dtype, condition) -> bool:
|
"""
|
Return true if the condition is satisfied for the arr_or_dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType
|
The array-like or dtype object whose dtype we want to extract.
|
condition : callable[Union[np.dtype, ExtensionDtype]]
|
|
Returns
|
-------
|
bool
|
|
"""
|
if arr_or_dtype is None:
|
return False
|
try:
|
dtype = get_dtype(arr_or_dtype)
|
except (TypeError, ValueError):
|
return False
|
return condition(dtype)
|
|
|
def get_dtype(arr_or_dtype) -> DtypeObj:
|
"""
|
Get the dtype instance associated with an array
|
or dtype object.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype object whose dtype we want to extract.
|
|
Returns
|
-------
|
obj_dtype : The extract dtype instance from the
|
passed in array or dtype object.
|
|
Raises
|
------
|
TypeError : The passed in object is None.
|
"""
|
if arr_or_dtype is None:
|
raise TypeError("Cannot deduce dtype from null object")
|
|
# fastpath
|
if isinstance(arr_or_dtype, np.dtype):
|
return arr_or_dtype
|
elif isinstance(arr_or_dtype, type):
|
return np.dtype(arr_or_dtype)
|
|
# if we have an array-like
|
elif hasattr(arr_or_dtype, "dtype"):
|
arr_or_dtype = arr_or_dtype.dtype
|
|
return pandas_dtype(arr_or_dtype)
|
|
|
def _is_dtype_type(arr_or_dtype, condition) -> bool:
|
"""
|
Return true if the condition is satisfied for the arr_or_dtype.
|
|
Parameters
|
----------
|
arr_or_dtype : array-like or dtype
|
The array-like or dtype object whose dtype we want to extract.
|
condition : callable[Union[np.dtype, ExtensionDtypeType]]
|
|
Returns
|
-------
|
bool : if the condition is satisfied for the arr_or_dtype
|
"""
|
if arr_or_dtype is None:
|
return condition(type(None))
|
|
# fastpath
|
if isinstance(arr_or_dtype, np.dtype):
|
return condition(arr_or_dtype.type)
|
elif isinstance(arr_or_dtype, type):
|
if issubclass(arr_or_dtype, ExtensionDtype):
|
arr_or_dtype = arr_or_dtype.type
|
return condition(np.dtype(arr_or_dtype).type)
|
|
# if we have an array-like
|
if hasattr(arr_or_dtype, "dtype"):
|
arr_or_dtype = arr_or_dtype.dtype
|
|
# we are not possibly a dtype
|
elif is_list_like(arr_or_dtype):
|
return condition(type(None))
|
|
try:
|
tipo = pandas_dtype(arr_or_dtype).type
|
except (TypeError, ValueError):
|
if is_scalar(arr_or_dtype):
|
return condition(type(None))
|
|
return False
|
|
return condition(tipo)
|
|
|
def infer_dtype_from_object(dtype) -> type:
|
"""
|
Get a numpy dtype.type-style object for a dtype object.
|
|
This methods also includes handling of the datetime64[ns] and
|
datetime64[ns, TZ] objects.
|
|
If no dtype can be found, we return ``object``.
|
|
Parameters
|
----------
|
dtype : dtype, type
|
The dtype object whose numpy dtype.type-style
|
object we want to extract.
|
|
Returns
|
-------
|
type
|
"""
|
if isinstance(dtype, type) and issubclass(dtype, np.generic):
|
# Type object from a dtype
|
|
return dtype
|
elif isinstance(dtype, (np.dtype, ExtensionDtype)):
|
# dtype object
|
try:
|
_validate_date_like_dtype(dtype)
|
except TypeError:
|
# Should still pass if we don't have a date-like
|
pass
|
if hasattr(dtype, "numpy_dtype"):
|
# TODO: Implement this properly
|
# https://github.com/pandas-dev/pandas/issues/52576
|
return dtype.numpy_dtype.type
|
return dtype.type
|
|
try:
|
dtype = pandas_dtype(dtype)
|
except TypeError:
|
pass
|
|
if is_extension_array_dtype(dtype):
|
return dtype.type
|
elif isinstance(dtype, str):
|
# TODO(jreback)
|
# should deprecate these
|
if dtype in ["datetimetz", "datetime64tz"]:
|
return DatetimeTZDtype.type
|
elif dtype in ["period"]:
|
raise NotImplementedError
|
|
if dtype in ["datetime", "timedelta"]:
|
dtype += "64"
|
try:
|
return infer_dtype_from_object(getattr(np, dtype))
|
except (AttributeError, TypeError):
|
# Handles cases like get_dtype(int) i.e.,
|
# Python objects that are valid dtypes
|
# (unlike user-defined types, in general)
|
#
|
# TypeError handles the float16 type code of 'e'
|
# further handle internal types
|
pass
|
|
return infer_dtype_from_object(np.dtype(dtype))
|
|
|
def _validate_date_like_dtype(dtype) -> None:
|
"""
|
Check whether the dtype is a date-like dtype. Raises an error if invalid.
|
|
Parameters
|
----------
|
dtype : dtype, type
|
The dtype to check.
|
|
Raises
|
------
|
TypeError : The dtype could not be casted to a date-like dtype.
|
ValueError : The dtype is an illegal date-like dtype (e.g. the
|
frequency provided is too specific)
|
"""
|
try:
|
typ = np.datetime_data(dtype)[0]
|
except ValueError as e:
|
raise TypeError(e) from e
|
if typ not in ["generic", "ns"]:
|
raise ValueError(
|
f"{repr(dtype.name)} is too specific of a frequency, "
|
f"try passing {repr(dtype.type.__name__)}"
|
)
|
|
|
def validate_all_hashable(*args, error_name: str | None = None) -> None:
|
"""
|
Return None if all args are hashable, else raise a TypeError.
|
|
Parameters
|
----------
|
*args
|
Arguments to validate.
|
error_name : str, optional
|
The name to use if error
|
|
Raises
|
------
|
TypeError : If an argument is not hashable
|
|
Returns
|
-------
|
None
|
"""
|
if not all(is_hashable(arg) for arg in args):
|
if error_name:
|
raise TypeError(f"{error_name} must be a hashable type")
|
raise TypeError("All elements must be hashable")
|
|
|
def pandas_dtype(dtype) -> DtypeObj:
|
"""
|
Convert input into a pandas only dtype object or a numpy dtype object.
|
|
Parameters
|
----------
|
dtype : object to be converted
|
|
Returns
|
-------
|
np.dtype or a pandas dtype
|
|
Raises
|
------
|
TypeError if not a dtype
|
"""
|
# short-circuit
|
if isinstance(dtype, np.ndarray):
|
return dtype.dtype
|
elif isinstance(dtype, (np.dtype, ExtensionDtype)):
|
return dtype
|
|
# registered extension types
|
result = registry.find(dtype)
|
if result is not None:
|
return result
|
|
# try a numpy dtype
|
# raise a consistent TypeError if failed
|
try:
|
with warnings.catch_warnings():
|
# GH#51523 - Series.astype(np.integer) doesn't show
|
# numpy deprication warning of np.integer
|
# Hence enabling DeprecationWarning
|
warnings.simplefilter("always", DeprecationWarning)
|
npdtype = np.dtype(dtype)
|
except SyntaxError as err:
|
# np.dtype uses `eval` which can raise SyntaxError
|
raise TypeError(f"data type '{dtype}' not understood") from err
|
|
# Any invalid dtype (such as pd.Timestamp) should raise an error.
|
# np.dtype(invalid_type).kind = 0 for such objects. However, this will
|
# also catch some valid dtypes such as object, np.object_ and 'object'
|
# which we safeguard against by catching them earlier and returning
|
# np.dtype(valid_dtype) before this condition is evaluated.
|
if is_hashable(dtype) and dtype in [object, np.object_, "object", "O"]:
|
# check hashability to avoid errors/DeprecationWarning when we get
|
# here and `dtype` is an array
|
return npdtype
|
elif npdtype.kind == "O":
|
raise TypeError(f"dtype '{dtype}' not understood")
|
|
return npdtype
|
|
|
def is_all_strings(value: ArrayLike) -> bool:
|
"""
|
Check if this is an array of strings that we should try parsing.
|
|
Includes object-dtype ndarray containing all-strings, StringArray,
|
and Categorical with all-string categories.
|
Does not include numpy string dtypes.
|
"""
|
dtype = value.dtype
|
|
if isinstance(dtype, np.dtype):
|
return (
|
dtype == np.dtype("object")
|
and lib.infer_dtype(value, skipna=False) == "string"
|
)
|
elif isinstance(dtype, CategoricalDtype):
|
return dtype.categories.inferred_type == "string"
|
return dtype == "string"
|
|
|
__all__ = [
|
"classes",
|
"classes_and_not_datetimelike",
|
"DT64NS_DTYPE",
|
"ensure_float",
|
"ensure_float64",
|
"ensure_python_int",
|
"ensure_str",
|
"get_dtype",
|
"infer_dtype_from_object",
|
"INT64_DTYPE",
|
"is_1d_only_ea_dtype",
|
"is_1d_only_ea_obj",
|
"is_all_strings",
|
"is_any_int_dtype",
|
"is_any_real_numeric_dtype",
|
"is_array_like",
|
"is_bool",
|
"is_bool_dtype",
|
"is_categorical_dtype",
|
"is_complex",
|
"is_complex_dtype",
|
"is_dataclass",
|
"is_datetime64_any_dtype",
|
"is_datetime64_dtype",
|
"is_datetime64_ns_dtype",
|
"is_datetime64tz_dtype",
|
"is_datetime_or_timedelta_dtype",
|
"is_decimal",
|
"is_dict_like",
|
"is_dtype_equal",
|
"is_ea_or_datetimelike_dtype",
|
"is_extension_array_dtype",
|
"is_file_like",
|
"is_float_dtype",
|
"is_int64_dtype",
|
"is_integer_dtype",
|
"is_interval",
|
"is_interval_dtype",
|
"is_iterator",
|
"is_named_tuple",
|
"is_nested_list_like",
|
"is_number",
|
"is_numeric_dtype",
|
"is_object_dtype",
|
"is_period_dtype",
|
"is_re",
|
"is_re_compilable",
|
"is_scipy_sparse",
|
"is_sequence",
|
"is_signed_integer_dtype",
|
"is_sparse",
|
"is_string_dtype",
|
"is_string_or_object_np_dtype",
|
"is_timedelta64_dtype",
|
"is_timedelta64_ns_dtype",
|
"is_unsigned_integer_dtype",
|
"needs_i8_conversion",
|
"pandas_dtype",
|
"TD64NS_DTYPE",
|
"validate_all_hashable",
|
]
|