fume-manage-python.git

U  
¬ý°d¢Hã@s´dZddlmZddlZddlZddlmZmZddlZddlm    Z    ddl
mZddlm Z mZmZmZmZddlmZdd    lmZdd
lmZddlmZddlmZddlZdd lmZmZddl m!Z!ddl"m#Z#ddl$m%Z%m&Z&m'Z'm(Z(m)Z)dddddZ*d8dddddddddZ+Gd ddZ,Gd!d"d"e,Z-Gd#d$d$e,Z.ee!d%d&d9d)d*dd+d,dd-d.d/d0d1Z/ee!d%d&d'ddej0ej0fd2dd-dd3d4d)d5d6d7Z1dS):z parquet compat é)ÚannotationsN)ÚAnyÚLiteral)Úcatch_warnings)Úlib)ÚDtypeBackendÚFilePathÚ
ReadBufferÚStorageOptionsÚWriteBuffer)Úimport_optional_dependency©ÚAbstractMethodError)Údoc)Úfind_stack_level)Úcheck_dtype_backend)Ú    DataFrameÚ
get_option)Ú_shared_docs)ÚVersion)Ú    IOHandlesÚ
get_handleÚ is_fsspec_urlÚis_urlÚstringify_pathÚstrÚBaseImpl)ÚengineÚreturncCs¤|dkrtd}|dkr|ttg}d}|D]D}z|WStk
rj}z|dt|7}W5d}~XYq(Xq(td||dkrtS|dkrtStd    dS)
zreturn our implementationÚautozio.parquet.engineÚz
 - NzÉUnable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:ÚpyarrowÚfastparquetz.engine must be one of 'pyarrow', 'fastparquet')rÚPyArrowImplÚFastParquetImplÚImportErrorrÚ
ValueError)rZengine_classesZ
error_msgsZengine_classÚerr©r(úHd:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\pandas/io/parquet.pyÚ
get_engine,s$$ÿ
r*ÚrbFz1FilePath | ReadBuffer[bytes] | WriteBuffer[bytes]rr
ÚboolzVtuple[FilePath | ReadBuffer[bytes] | WriteBuffer[bytes], IOHandles[bytes] | None, Any])ÚpathÚfsÚstorage_optionsÚmodeÚis_dirrcCst|}t|r:|dkr:td}|jj|f|p0i\}}n|rVt|rN|dkrVtdd}|s|st|trt    j
 |¡st||d|d}d}|j }|||fS)zFile handling for PyArrow.NÚfsspecr+z8storage_options passed with buffer, or non-supported URLF©Zis_textr/)rrrÚcoreZ    url_to_fsrr&Ú
isinstancerÚosr-ÚisdirrÚhandle)r-r.r/r0r1Úpath_or_handler2Úhandlesr(r(r)Ú_get_path_or_handleNs8
ÿÿ
ÿþý
ü    ÿr;c@s>eZdZedddddZddddZd dd
ddZd    S)rrÚNone)ÚdfrcCst|tstddS)Nz+to_parquet only supports IO with DataFrames)r5rr&©r=r(r(r)Úvalidate_dataframews
zBaseImpl.validate_dataframer>cKst|dS©Nr )Úselfr=r-ÚcompressionÚkwargsr(r(r)Úwrite|szBaseImpl.writeN©rcKst|dSr@r )rAr-ÚcolumnsrCr(r(r)Úreadsz BaseImpl.read)N)Ú__name__Ú
__module__Ú__qualname__Ústaticmethodr?rDrGr(r(r(r)rvsc    @sVeZdZddddZdddd    d
dddd ddZddejdfdddddddZdS)r#r<rEcCs&tdddddl}ddl}||_dS)Nr!z(pyarrow is required for parquet support.©Úextrar)rZpyarrow.parquetZ(pandas.core.arrays.arrow.extension_typesÚapi)rAr!Úpandasr(r(r)Ú__init__sÿzPyArrowImpl.__init__ÚsnappyNrzFilePath | WriteBuffer[bytes]ú
str | Noneúbool | Noner
úlist[str] | None)r=r-rBÚindexr/Úpartition_colsrcKs| |¡d| dd¡i}|dk    r*||d<|jjj|f|}    t|| dd¡|d|dk    d\}
}|d<t|
tjr¢t    |
dr¢t|
j
ttfr¢|
j
}
t|
tr¢|
  ¡}
zH|dk    rÌ|jjj|    |
f||d|n|jjj|    |
fd|i|W5|dk    rü| ¡XdS)    NZschemaZpreserve_indexÚ
filesystemÚwb)r/r0r1Úname)rBrVrB)r?ÚpoprNZTableZfrom_pandasr;r5ÚioÚBufferedWriterÚhasattrrYrÚbytesÚdecodeÚcloseÚparquetZwrite_to_datasetZwrite_table)rAr=r-rBrUr/rVrCZfrom_pandas_kwargsÚtabler9r:r(r(r)rDsT
 
 
û
ÿþý
þüû    ÿÿÿ
zPyArrowImpl.writeFr,úDtypeBackend | lib.NoDefault)Úuse_nullable_dtypesÚ dtype_backendr/rcKsÚd|d<i}|dkr2ddlm}|}    |    j|d<n|dkrDtj|d<td}
|
d    kr\d|d
<t|| dd¡|dd \}}|d<zD|j    j
j|fd|i|} | jf|}|
d    kr¼|j d    dd}|W¢S|dk    rÔ| ¡XdS)NTZuse_pandas_metadataZnumpy_nullabler)Ú_arrow_dtype_mappingZtypes_mapperr!zmode.data_managerÚarrayZsplit_blocksrWr+)r/r0rFF)Úcopy)Zpandas.io._utilrfÚgetÚpdZ
ArrowDtyperr;rZr`rNraZ
read_tableÚ    to_pandasZ_as_manager)rAr-rFrdrer/rCZto_pandas_kwargsrfÚmappingÚmanagerr9r:Zpa_tableÚresultr(r(r)rGÄs>    
 
üÿÿÿzPyArrowImpl.read)rQNNN)rHrIrJrPrDrÚ
no_defaultrGr(r(r(r)r#sù8úr#c@sBeZdZddddZdddd    dd
ddZdd    dd ddZdS)r$r<rEcCstddd}||_dS)Nr"z,fastparquet is required for parquet support.rL)rrN)rAr"r(r(r)rPñs
ÿzFastParquetImpl.__init__rQNrz,Literal[('snappy', 'gzip', 'brotli')] | Noner
)r=rBr/rc    s´| |¡d|kr"|dk    r"tdd|kr4| d¡}|dk    rDd|d<t|}t|rptdfdd|d<nr|td    td
d$|jj||f|||d|W5QRXdS) NÚpartition_onzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning dataZhiveZfile_schemer2csj|dfpi ¡S)NrX©Úopen©r-Ú_©r2r/r(r)Ú<lambda>s
ÿÿz'FastParquetImpl.write.<locals>.<lambda>Ú    open_withz?storage_options passed with file object or non-fsspec file pathT)Úrecord)rBZwrite_indexrp)    r?r&rZrrrrrNrD)rAr=r-rBrUrVr/rCr(rur)rDùs8
 
ÿ
ÿþûúzFastParquetImpl.write)r/rc
s&i}| dd¡}| dtj¡}t|jjtdkr:d|d<|rFtd|tjk    rXtdt|}d}t|r¸t    dt|jjtd    kr¤j
|d
fpij|d<qäfdd |d<n,t|t rätj |¡sät|d
dd}|j}z(|jj|f|}    |    jfd|i|W¢S|dk    r | ¡XdS)NrdFrez0.7.1Zpandas_nullszNThe 'use_nullable_dtypes' argument is not supported for the fastparquet enginezHThe 'dtype_backend' argument is not supported for the fastparquet enginer2z0.6.1r+r.csj|dfpi ¡S)Nr+rqrsrur(r)rvEs
ÿÿz&FastParquetImpl.read.<locals>.<lambda>rwr3rF)rZrrorrNÚ__version__r&rrrrrr.r5rr6r-r7rr8r`ZParquetFilerk)
rAr-rFr/rCZparquet_kwargsrdrer:Zparquet_filer(rur)rG(sLÿ
ÿÿÿÿ
zFastParquetImpl.read)rQNNN)NN)rHrIrJrPrDrGr(r(r(r)r$ðsù0ÿr$r/)r/rrQrz$FilePath | WriteBuffer[bytes] | NonerRrSrTzbytes | None)r=r-rrBrUr/rVrc
Ksrt|tr|g}t|}|dkr(t ¡n|}    |j||    f||||d||dkrjt|    tjsbt|     ¡SdSdS)a}
    Write a DataFrame to the parquet format.
 
    Parameters
    ----------
    df : DataFrame
    path : str, path object, file-like object, or None, default None
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``write()`` function. If None, the result is
        returned as bytes. If a string, it will be used as Root Directory path
        when writing a partitioned dataset. The engine fastparquet does not
        accept file-like objects.
 
        .. versionchanged:: 1.2.0
 
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    compression : {{'snappy', 'gzip', 'brotli', 'lz4', 'zstd', None}},
        default 'snappy'. Name of the compression to use. Use ``None``
        for no compression. The supported compression methods actually
        depend on which engine is used. For 'pyarrow', 'snappy', 'gzip',
        'brotli', 'lz4', 'zstd' are all supported. For 'fastparquet',
        only 'gzip' and 'snappy' are supported.
    index : bool, default None
        If ``True``, include the dataframe's index(es) in the file output. If
        ``False``, they will not be written to the file.
        If ``None``, similar to ``True`` the dataframe's index(es)
        will be saved. However, instead of being saved as values,
        the RangeIndex will be stored as a range in the metadata so it
        doesn't require much space and is faster. Other indexes will
        be included as columns in the file output.
    partition_cols : str or list, optional, default None
        Column names by which to partition the dataset.
        Columns are partitioned in the order they are given.
        Must be None if path is not a string.
    {storage_options}
 
        .. versionadded:: 1.2.0
 
    kwargs
        Additional keyword arguments passed to the engine
 
    Returns
    -------
    bytes if no path argument is provided else None
    N)rBrUrVr/)r5rr*r[ÚBytesIOrDÚAssertionErrorÚgetvalue)
r=r-rrBrUr/rVrCÚimplZpath_or_bufr(r(r)Ú
to_parquetYs&<
þúù
r~zFilePath | ReadBuffer[bytes]zbool | lib.NoDefaultrc)r-rrFr/rdrerc    Ksbt|}|tjk    r:d}|dkr&|d7}tj|ttdnd}t||j|f||||d|S)a    
    Load a parquet object from the file path, returning a DataFrame.
 
    Parameters
    ----------
    path : str, path object or file-like object
        String, path object (implementing ``os.PathLike[str]``), or file-like
        object implementing a binary ``read()`` function.
        The string could be a URL. Valid URL schemes include http, ftp, s3,
        gs, and file. For file URLs, a host is expected. A local file could be:
        ``file://localhost/path/to/table.parquet``.
        A file URL can also be a path to a directory that contains multiple
        partitioned parquet files. Both pyarrow and fastparquet support
        paths to directories as well as file URLs. A directory path could be:
        ``file://localhost/path/to/tables`` or ``s3://bucket/partition_dir``.
    engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto'
        Parquet library to use. If 'auto', then the option
        ``io.parquet.engine`` is used. The default ``io.parquet.engine``
        behavior is to try 'pyarrow', falling back to 'fastparquet' if
        'pyarrow' is unavailable.
    columns : list, default=None
        If not None, only these columns will be read from the file.
 
    {storage_options}
 
        .. versionadded:: 1.3.0
 
    use_nullable_dtypes : bool, default False
        If True, use dtypes that use ``pd.NA`` as missing value indicator
        for the resulting DataFrame. (only applicable for the ``pyarrow``
        engine)
        As new dtypes are added that support ``pd.NA`` in the future, the
        output with this option will change to use those dtypes.
        Note: this is an experimental option, and behaviour (e.g. additional
        support dtypes) may change without notice.
 
        .. deprecated:: 2.0
 
    dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames
        Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
        arrays, nullable dtypes are used for all dtypes that have a nullable
        implementation when "numpy_nullable" is set, pyarrow is used for all
        dtypes if "pyarrow" is set.
 
        The dtype_backends are still experimential.
 
        .. versionadded:: 2.0
 
    **kwargs
        Any additional kwargs are passed to the engine.
 
    Returns
    -------
    DataFrame
    zYThe argument 'use_nullable_dtypes' is deprecated and will be removed in a future version.TzFUse dtype_backend='numpy_nullable' instead of use_nullable_dtype=True.)Ú
stacklevelF)rFr/rdre)    r*rroÚwarningsÚwarnÚ FutureWarningrrrG)    r-rrFr/rdrerCr}Úmsgr(r(r)Úread_parquet¬s*A
ÿÿÿûúr)Nr+F)NrrQNNN)2Ú__doc__Ú
__future__rr[r6ÚtypingrrrrZpandas._libsrZpandas._typingrrr    r
rZpandas.compat._optionalrZ pandas.errorsrZpandas.util._decoratorsrZpandas.util._exceptionsrZpandas.util._validatorsrrOrjrrZpandas.core.shared_docsrZpandas.util.versionrZpandas.io.commonrrrrrr*r;rr#r$r~rorr(r(r(r)Ú<module>sR    %û( miù Rú