fume-manage-python.git

U  
¬ý°dlEã    @sUddlmZddlZddlZddlmZddlZddlZ    ddl
mZmZm Z mZmZddlmZmZejejejejejdejejejejejdejejej dej!de"iiZ#d    e$d
<d6dd dddZ%d7dddddZ&dd dddZ'dddddZ(dddddZ)dddd d!Z*d"d#Z+dddd$d%Z,d8d&d'd(d)d*d+d,d-Z-d9d*d(d(d*d.d/d0Z.d:d1dd2dd3d4d5Z/dS);é)ÚannotationsN)ÚAny)ÚBufferÚColumnÚColumnNullTypeÚ    DataFrameÚ    DtypeKind)ÚArrowCTypesÚ
Endianness)ééé é@)r rrzdict[DtypeKind, dict[int, Any]]Ú
_NP_DTYPESTÚboolzpd.DataFrame)Ú
allow_copyÚreturncCs2t|tjr|St|ds"tdt|j|dS)a¯
    Build a ``pd.DataFrame`` from any DataFrame supporting the interchange protocol.
 
    Parameters
    ----------
    df : DataFrameXchg
        Object supporting the interchange protocol, i.e. `__dataframe__` method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).
 
    Returns
    -------
    pd.DataFrame
    Ú __dataframe__z#`df` does not support __dataframe__)r)Ú
isinstanceÚpdrÚhasattrÚ
ValueErrorÚ_from_dataframer©Údfr©rú]d:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\pandas/core/interchange/from_dataframe.pyÚfrom_dataframes
 
rÚ DataFrameXchgrcCsg}| ¡D]}t|}| |¡q|s<t|dkr<tdt|dkrR|d}ntj|dddd}|j dd¡}|dk    r||_    |S)    a
    Build a ``pd.DataFrame`` from the DataFrame interchange object.
 
    Parameters
    ----------
    df : DataFrameXchg
        Object supporting the interchange protocol, i.e. `__dataframe__` method.
    allow_copy : bool, default: True
        Whether to allow copying the memory to perform the conversion
        (if false then zero-copy approach is requested).
 
    Returns
    -------
    pd.DataFrame
    ézHTo join chunks a copy is required which is forbidden by allow_copy=FalserTF)ZaxisZignore_indexÚcopyzpandas.indexN)
Z
get_chunksÚprotocol_df_chunk_to_pandasÚappendÚlenÚRuntimeErrorrÚconcatÚmetadataÚgetÚindex)rrZ
pandas_dfsÚchunkÚ    pandas_dfZ    index_objrrrr7sÿ
r)rrcCsi}g}| ¡D]â}t|ts.td|d||krFtd|d| |¡}|jd}|tjtjtj    tj
fkrt|\||<}nd|tjkr t |\||<}nH|tjkr¼t|\||<}n,|tjkrØt|\||<}ntd|d| |¡qt |¡}||jd<|S)z¡
    Convert interchange protocol chunk to ``pd.DataFrame``.
 
    Parameters
    ----------
    df : DataFrameXchg
 
    Returns
    -------
    pd.DataFrame
    zColumn z is not a stringz is not uniquerz
Data type z not handled yetZ_INTERCHANGE_PROTOCOL_BUFFERS)Zcolumn_namesrÚstrrZget_column_by_nameÚdtyperÚINTÚUINTÚFLOATÚBOOLÚprimitive_column_to_ndarrayZCATEGORICALÚcategorical_column_to_seriesÚSTRINGÚstring_column_to_ndarrayZDATETIMEÚdatetime_column_to_ndarrayÚNotImplementedErrorr"rrÚattrs)rÚcolumnsÚbuffersÚnameÚcolr,Úbufr*rrrr!\s6
 
 
ü
 
 
 
 
r!rztuple[np.ndarray, Any])r;rcCs@| ¡}|d\}}t|||j| ¡}t|||d}||fS)aJ
    Convert a column holding one of the primitive dtypes to a NumPy array.
 
    A primitive type is one of: int, uint, float, bool.
 
    Parameters
    ----------
    col : Column
 
    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object
        that keeps the memory alive.
    ÚdataÚvalidity)Úget_buffersÚbuffer_to_ndarrayÚoffsetÚsizeÚ    set_nulls)r;r9Ú    data_buffÚ
data_dtyper=rrrr1s
r1ztuple[pd.Series, Any]cCs¬|j}|dstd|d}t|dr6t |j¡}ntd| ¡}|d\}}t|||j|     ¡}||t
|}tj|||dd}    t  |    ¡}
t|
||d    }
|
|fS)
a
    Convert a column holding categorical data to a pandas Series.
 
    Parameters
    ----------
    col : Column
 
    Returns
    -------
    tuple
        Tuple of pd.Series holding the data and the memory owner object
        that keeps the memory alive.
    Z is_dictionaryz-Non-dictionary categoricals not supported yetÚ
categoriesÚ_colz}Interchanging categorical columns isn't supported yet, and our fallback of using the `col._col` attribute (a ndarray) failed.r=Z
is_ordered)rFZorderedr>)Zdescribe_categoricalr6rÚnpÚarrayrGr?r@rArBr#rZCategoricalÚSeriesrC)r;ZcategoricalZ
cat_columnrFr9Z
codes_buffZcodes_dtypeÚcodesÚvaluesÚcatr=rrrr2£s*
ÿÿ
r2cCs¤|j\}}|tjtjtjfkr,t|d| ¡}|dsDtd|d\}}|ddks`t|dtj    tj
fksxttjdtj tjf}t||d| ¡d    }|d\}}    t||    |j| ¡dd
}
d}|tjtjfkr|dsêtd |d\}} t|| |j| ¡}|dkr|}dg| ¡}t| ¡D]\}|dk    rZ||rZtj||<q4||
||
|d}t|}|jdd}|||<q4tj|dd|fS)a
    Convert a column holding string data to a NumPy array.
 
    Parameters
    ----------
    col : Column
 
    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object
        that keeps the memory alive.
    z3 null kind is not yet supported for string columns.Úoffsetsz#String buffers must contain offsetsr=rrér)rAÚlength)rPNr>z*Validity buffers cannot be empty for maskszutf-8)ÚencodingÚobject©r,)Ú describe_nullrÚNON_NULLABLEÚUSE_BITMASKÚUSE_BYTEMASKr6r?ÚAssertionErrorr    r3ZLARGE_STRINGrr.ZUINT8r
ÚNATIVEr@rBrAÚrangerHÚnanÚbytesÚdecodeZasarray)r;Ú    null_kindÚsentinel_valr9rDZprotocol_data_dtyperEr=Zoffset_buffZoffset_dtyperNÚnull_posÚ
valid_buffÚvalid_dtypeZstr_listÚiZunitsZ    str_bytesÚstringrrrr4Òs^
ýÿþ
ü
ÿ
 
r4cCsÎt d|¡}|r\| d¡| d¡}}|dkr6td|dkrF|d7}| d|d¡}|St d    |¡}|r¼| d¡}|d
kr| tj¡d d¡}n"|d krª| d¡}ntd||Std|dS)z4Parse datetime `format_str` to interpret the `data`.zts([smun]):(.*)rrOÚzTimezones are not supported yetÚszdatetime64[ú]ztd([Dm])ÚDiQz datetime64[s]Úmzdatetime64[ms]zDate unit is not supported: z DateTime kind is not supported: N)ÚreÚmatchÚgroupr6ÚastyperHÚuint64)Ú
format_strr=Ztimestamp_metaÚunitÚtzZ    date_metarrrÚparse_datetime_format_str+s&
rrc    Csx| ¡}|j\}}}}|d\}}t|tj|dttd|dtjf|j    | 
¡}t||}t|||d}||fS)a
    Convert a column holding DateTime data to a NumPy array.
 
    Parameters
    ----------
    col : Column
 
    Returns
    -------
    tuple
        Tuple of np.ndarray holding the data and the memory owner object
        that keeps the memory alive.
    r=rr.r>) r?r,r@rr.Úgetattrr    r
rYrArBrrrC)r;r9Ú_roÚdbufr,r=rrrr5Ls ü÷
r5rztuple[DtypeKind, int, str, str]Úintz
int | Nonez
np.ndarray)Úbufferr,rArPrcCsÂ|\}}}}t |i¡ |d¡}|dkr8td|dtj |¡}t |j||dt     |¡¡}    |dkr¢|dk    s|t
dtjj|    |jfd}
t |
||ddStjj|    |j|dfdSdS)    aÚ
    Build a NumPy array from the passed buffer.
 
    Parameters
    ----------
    buffer : Buffer
        Buffer to build a NumPy array from.
    dtype : tuple
        Data type of the buffer conforming protocol dtypes format.
    offset : int, default: 0
        Number of elements to offset from the start of the buffer.
    length : int, optional
        If the buffer is a bit-mask, specifies a number of bits to read
        from the buffer. Has no effect otherwise.
 
    Returns
    -------
    np.ndarray
 
    Notes
    -----
    The returned array doesn't own the memory. The caller of this function is
    responsible for keeping the memory owner object alive as long as
    the returned NumPy array is being used.
    NzConversion for ú is not yet supported.rrz1`length` must be specified for a bit-mask buffer.)Úshape)Úfirst_byte_offset)rr'r6rHZ    ctypeslibZas_ctypes_typeÚctypesÚcastÚptrÚPOINTERrXZas_arrayÚbufsizeÚbitmask_to_bool_ndarray)rwr,rArPÚkindZ    bit_widthrtZcolumn_dtypeZctypes_typeZdata_pointerZarrrrrr@ps"ÿÿr@)ÚbitmaskÚmask_lengthrzrc
Cs
|d}||d}|d;}tj|td}|d}d}td||}t|D]$}|d||>@rhd||<|d7}qLt||dD]:}    ||    d}tdD] }|d|>@r²d||<|d7}qqt|dkr|d}tt||D] }|d|>@rüd||<|d7}qä|S)a
    Convert bit-mask to a boolean NumPy array.
 
    Parameters
    ----------
    bitmask : np.ndarray[uint8]
        NumPy array of uint8 dtype representing the bitmask.
    mask_length : int
        Number of elements in the mask to interpret.
    first_byte_offset : int, default: 0
        Number of elements to offset from the start of the first byte.
 
    Returns
    -------
    np.ndarray[bool]
    rNrSrrTéÿÿÿÿ)rHÚzerosrÚminrZr#)
rrrzZ bytes_to_skipZ    bool_maskÚvalZmask_idxZbits_in_first_byteÚjrcrrrr§s0
 
rznp.ndarray | pd.Seriesz5tuple[Buffer, tuple[DtypeKind, int, str, str]] | None)r=r;r>Úallow_modify_inplacec    Csè|j\}}d}|tjkr(t |¡|k}nj|tjtjfkrp|sDtd|\}}t|||j    | 
¡}|dkr|}n"|tjtjfkrnt d|d|dk    rät |¡rä|s°| ¡}zd||<Wn&tk
râ| t¡}d||<YnX|S)aÆ
    Set null values for the data according to the column null kind.
 
    Parameters
    ----------
    data : np.ndarray or pd.Series
        Data to set nulls in.
    col : Column
        Column object that describes the `data`.
    validity : tuple(Buffer, dtype) or None
        The return value of ``col.buffers()``. We do not access the ``col.buffers()``
        here to not take the ownership of the memory of buffer objects.
    allow_modify_inplace : bool, default: True
        Whether to modify the `data` inplace when zero-copy is possible (True) or always
        modify a copy of the `data` (False).
 
    Returns
    -------
    np.ndarray or pd.Series
        Data with the nulls being set.
    Nz/Expected to have a validity buffer for the maskrz
Null kind rx)rTrZUSE_SENTINELrrJrVrWrXr@rArBrUZUSE_NANr6rHÚanyr Ú    TypeErrorrmÚfloat)    r=r;r>rr^r_r`rarbrrrrCÝs,
 
 
rC)T)T)rN)r)T)0Ú
__future__rr{rjÚtypingrÚnumpyrHZpandasrZ*pandas.core.interchange.dataframe_protocolrrrrrrZpandas.core.interchange.utilsr    r
r-Zint8Úint16Úint32Úint64r.Zuint8Zuint16Zuint32rnr/Úfloat32Úfloat64r0rrÚ__annotations__rrr!r1r2r4rrr5r@rrCrrrrÚ<module>s@ü%./Y!'ü8ÿ:ü