zmc
2023-12-22 9fdbf60165db0400c2e8e6be2dc6e88138ac719a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
U
­ý°dG¯ã @s®dZddlZddlmZddlZddlZddlmZddlZ    ddl
Z
ddl m Z ddl mZddlmZmZddlmmZddlZddlmZddlmZdd    lmZmZm Z m!Z!m"Z"zddl#Z#d
Z$Wne%k
râd Z$YnXzddl&Z&d
Z'Wne%k
rd Z'YnXe
j(e
j)d e
j*j+e' p2e d ƒdkddde
j)de
j*j+e$ dddgddd„ƒZ,e
j(dd„ƒZ-e
j(dd„ƒZ.e
j(dd„ƒZ/e
j(dd„ƒZ0e
j(dd „ƒZ1e
j(ej 2ej3j4¡ej 2ej3j5¡ej 2ej3j6¡ej 7d!d"¡ej 7d#d"¡ej 7d$d"¡ej 7d%d"¡gdd&d'„ƒZ8dEd)d*„Z9d+d,„Z:d-d.„Z;d/d0„Z<d1d2„Z=d3d4„Z>d5d6„Z?d7d8„Z@d9d:„ZAd;d<„ZBGd=d>„d>ƒZCGd?d@„d@eCƒZDGdAdB„dBeCƒZEGdCdD„dDeCƒZFdS)Fz test parquet compat éN)ÚBytesIO)Úcatch_warnings)Ú
get_option)Úis_platform_windows)Úpa_version_under7p0Úpa_version_under8p0)ÚVersion)ÚFastParquetImplÚ PyArrowImplÚ
get_engineÚ read_parquetÚ
to_parquetTFÚ fastparquetúmode.data_managerÚarrayz4fastparquet is not installed or ArrayManager is used©Úreason)ZmarksÚpyarrowúpyarrow is not installed)ÚparamscCs|jS©N©Úparam©Úrequest©rúSd:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\pandas/tests/io/test_parquet.pyÚengine3srcCstst d¡dS)Nrr)Ú _HAVE_PYARROWÚpytestÚskiprrrrÚpaHs
r!cCs*tst d¡ntdƒdkr&t d¡dS)Nzfastparquet is not installedrrz.ArrayManager is not supported with fastparquetr)Ú_HAVE_FASTPARQUETrr rrrrrÚfpOs
 
r#cCst dddgddœ¡S)NéééÚfoo©ÚAÚB)ÚpdÚ    DataFramerrrrÚ    df_compatXsr-c
CsDt tdƒttddƒƒtjdddddd    dgtjd
d d d œ¡}|S)NÚabcr$éç@ç@Úfloat64©ÚdtypeTFÚ20130101r&©Úperiods)ÚaÚbÚdÚeÚf)r+r,ÚlistÚrangeÚnpÚarangeÚ
date_range)ÚdfrrrÚdf_cross_compat]s  úÿ rCcCs’t tdƒdtjdgdddgdddgddd    gttd
d ƒƒt d d ¡ d¡tjdddddtjdgdddgtjdd dt     d¡tj
t     d¡gdœ ¡S)Nr.r8Úcófooóbarsbazr'ÚbarÚbazr$r/r&éÚu1r0r1r2r3ç@ç@TFr5r6Z20130103) ÚstringZstring_with_nanZstring_with_noneÚbytesÚunicodeÚintZuintÚfloatZfloat_with_nanÚboolÚdatetimeÚdatetime_with_nat) r+r,r=r?Únanr>r@ÚastyperAÚ    TimestampZNaTrrrrÚdf_fullos$
 
 ýôÿrXz2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215cCs|jSrrrrrrÚtimezone_aware_date_list‡s rYr%c
    s€ˆp
ddi‰ˆpi‰ˆdkr ˆ‰|r4|ˆd<|ˆd<‡‡‡‡‡‡‡‡fdd„}
ˆdkrtt ¡‰|
|    ƒW5QRXn|
|    ƒdS)aVerify parquet serializer and deserializer produce the same results.
 
    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.
 
    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    Ú compressionNrc
sVt|ƒD]H}ˆjˆfˆŽtddtˆfˆŽ}W5QRXtjˆ|ˆˆˆdqdS)NT©Úrecord)Ú check_namesÚ
check_likeÚ check_dtype)r>r rr ÚtmÚassert_frame_equal)ÚrepeatÚ_Úactual©r_r^r]rBÚexpectedÚpathÚ read_kwargsÚ write_kwargsrrÚcompareÂs  ûz!check_round_trip.<locals>.compare)r`Ú ensure_clean) rBrrgrirhrfr]r^r_rbrjrrerÚcheck_round_trip–s" 
rlcCs|trLddlm}|j|dd}t|jjƒt|ƒks6t‚|jjt|ƒksxt‚n,ddl    m
}|j
|dd}|j j j |ksxt‚dS)zÂCheck partitions of a parquet file are as expected.
 
    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    rNF)Zvalidate_schemaZhive)Ú partitioning)rÚpyarrow.parquetÚparquetZParquetDatasetÚlenZ
partitionsZpartition_namesÚAssertionErrorÚsetZpyarrow.datasetÚdatasetrmÚschemaÚnames)rgrfÚpqrsZdsrrrÚcheck_partition_names×s
  rwc    Cs.d}tjt|dt|ddƒW5QRXdS)Nz.engine must be one of 'pyarrow', 'fastparquet'©Úmatchr'rG)rÚraisesÚ
ValueErrorrl)r-ÚmsgrrrÚtest_invalid_engineîsr}c    Cs$t dd¡t|ƒW5QRXdS)Núio.parquet.enginer©r+Úoption_contextrl)r-r!rrrÚtest_options_pyôsrc    Cs$t dd¡t|ƒW5QRXdS)Nr~rr)r-r#rrrÚtest_options_fpûsr‚c    Cs$t dd¡t|ƒW5QRXdS)Nr~Úautor)r-r#r!rrrÚtest_options_autosr„c    Csttdƒtƒst‚ttdƒtƒs$t‚t dd¡<ttdƒtƒsDt‚ttdƒtƒsVt‚ttdƒtƒsht‚W5QRXt dd¡<ttdƒtƒs’t‚ttdƒtƒs¤t‚ttdƒtƒs¶t‚W5QRXt dd¡>ttdƒtƒsàt‚ttdƒtƒsòt‚ttdƒtƒst‚W5QRXdS)Nrrr~rƒ)Ú
isinstancer r
rqr    r+r€)r#r!rrrÚtest_options_get_engine    sr†c    Cs.ddlm}| d¡}| d¡}ts(dnttjƒt|ƒk}tsBdnttjƒt|ƒk}to\| }tof| }|s*|s*|r¨d|›d}t    j
t |dt d    ƒW5QRXn&d
}t    j
t |dt d    ƒW5QRX|rd|›d }t    j
t |dt d    ƒW5QRXn&d }t    j
t |dt d    ƒW5QRXdS) Nr)ÚVERSIONSrrFzPandas requires version .z. or newer of .pyarrow.rxrƒz%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.) Zpandas.compat._optionalr‡ÚgetrrrÚ __version__r"rrrzÚ ImportErrorr )r‡Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpryrrrÚ"test_get_engine_auto_error_messages8 
 
ÿýÿý
 
   r‹c    Csj|}t ¡T}|j||ddt||d}t ||¡t||ddgd}t ||ddg¡W5QRXdS)N©rrZ©rr8r:©rÚcolumns)r`rkr r ra©rCr!r#rBrgÚresultrrrÚtest_cross_engine_pa_fpHs
  r’c
Cs€|}t ¡j}|j||ddtddDt||d}t ||¡t||ddgd}t ||ddg¡W5QRXW5QRXdS)NrŒTr[rr8r:rŽ)r`rkr rr rarrrrÚtest_cross_engine_fp_paVs
   r“c@s:eZdZdd„Zdd„Zejjejddddd    „ƒƒZ    d
S) ÚBasec
CsBt ¡0}tj||dt|||ddW5QRXW5QRXdS)Nrx©rZ)r`rkrrzr )ÚselfrBrÚexcÚerr_msgrgrrrÚcheck_error_on_writees
zBase.check_error_on_writec
Cs>t ¡,}t |¡t|||ddW5QRXW5QRXdS)Nr•)r`rkÚexternal_error_raisedr )r–rBrr—rgrrrÚcheck_external_error_on_writeks
 z"Base.check_external_error_on_writeúdhttps://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/parquet/simple.parquetT)ÚurlZcheck_before_testcCs.|dkrt |¡d}t|ƒ}t ||¡dS)Nrƒrœ)rÚ importorskipr r`ra)r–r-rrrBrrrÚtest_parquet_read_from_urlqs     
ÿzBase.test_parquet_read_from_urlN)
Ú__name__Ú
__module__Ú __qualname__r™r›rÚmarkÚnetworkr`rŸrrrrr”dsûr”c @sÊeZdZdd„Zdd„Zej ddddd    g¡d
d „ƒZd d „Z    dd„Z
dd„Z dd„Z dd„Z dd„Zdd„Zdd„Zdd„Zdd„Zejjed d!d"d#„ƒZej d$d%d&d'd(d)d*d+d,d-g    ¡d.d/„ƒZdS)0Ú    TestBasicc    CsJt dddg¡ddt d¡t dddg¡fD]}d}| ||t|¡q,dS)Nr$r%r&r'r5z+to_parquet only supports IO with DataFrames)r+ÚSeriesrWr?rr™r{)r–rÚobjr|rrrÚ
test_error…sûzTestBasic.test_errorcCs6t tdƒttddƒƒdœ¡}ddg|_t||ƒdS)Nr.r$r/©rMrPr'rG)r+r,r=r>rrl)r–rrBrrrÚtest_columns_dtypess
zTestBasic.test_columns_dtypesrZNÚgzipÚsnappyÚbrotlicCsP|dkrt d¡n|dkr&t d¡t ddddgi¡}t||d|iddS)    Nr¬r­r)r$r%r&rZ©ri)rržr+r,rl)r–rrZrBrrrÚtest_compression—s  
zTestBasic.test_compressioncCsJt tdƒttddƒƒdœ¡}t dtdƒi¡}t|||ddgiddS)Nr.r$r/r©rMr©rfrh)r+r,r=r>rl)r–rrBrfrrrÚtest_read_columns¢sÿzTestBasic.test_read_columnscCs¦|dk}t ddddgi¡}t||ƒdddgtjdddtd    ƒdddgg}|D]2}||_t|tjƒrt|j d¡|_t|||d
qPd ddg|_d |j_    t||ƒdS) Nrr)r$r%r&r/r5r6r.)r]rr')
r+r,rlrAr=Úindexr…Z DatetimeIndexÚ
_with_freqÚname)r–rr]rBZindexesr²rrrÚtest_write_index«s 
 ü  zTestBasic.test_write_indexcCs>|}t ddddgi¡}tj dddg¡}||_t||ƒdS)Nr)r$r%r&©r8r$©r8r%©r9r$)r+r,Ú
MultiIndexÚ from_tuplesr²rl)r–r!rrBr²rrrÚtest_write_multiindexÃs
zTestBasic.test_write_multiindexcCs¢|}tjdddd}tjtj dt|ƒd¡tdƒd}tjj    d    d
g|gd d gd }|j
dd }||fD]4}||_ t ||ƒt ||dddgi|ddgdqhdS)Nz 01-Jan-2018z 01-Dec-2018ZMS)Úfreqr%r&ÚABC©rÚLevel1ÚLevel2ÚlevelÚdate)rurr)r*©rhrf) r+rAr,r?ÚrandomÚrandnrpr=r¹Z from_productÚcopyr²rl)r–r!rÚdatesrBZindex1Úindex2r²rrrÚtest_multiindex_with_columnsÌs"$
ÿ  
 
 
ÿz&TestBasic.test_multiindex_with_columnsc    Csêt dddgdddgdœ¡}ddd    œ}|jd
d }t||||d tjdddgdddgdœd ddgd}t||||d ddddddddgddddddddgg}tjttdƒƒdd„tdƒDƒdœ|d}|jd
d }t||||d dS)Nr$r%r&ÚqÚrÚs)r8r9F)rZr²T)Údrop©rirfZzyxZwvuZtsr©r²rGrHr'ÚquxÚoneÚtwoécSsg|]
}| ‘qSrr)Ú.0ÚirrrÚ
<listcomp>ösz7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)rÑrÒ)r+r,Z reset_indexrlr=r>)r–rrBrirfÚarraysrrrÚtest_write_ignoring_indexÜs$
 ÿþÿ z#TestBasic.test_write_ignoring_indexcCsZtj dddg¡}tjtj dd¡|d}|dkrD| ||td¡n|d    krVt    ||ƒdS)
Nr¶r·r¸r/r&r¾rúColumn name must be a stringr)
r+r¹rºr,r?rÄrÅr™Ú    TypeErrorrl)r–rZ
mi_columnsrBrrrÚtest_write_column_multiindexüsÿz&TestBasic.test_write_column_multiindexc    Cs˜ddddddddgddddddddgg}tjtj dd¡|d}d    d
g|j_|d kr‚ttj    ƒtd ƒkrlt
}nt }|  |||d ¡n|dkr”t ||ƒdS)NrGrHr'rÐr$r%rÓr¾r¿rÀrz0.7.0z Column namer)r+r,r?rÄrÅrrurrr‰rÚr{r™rl)r–rr×rBÚerrrrrÚ&test_write_column_multiindex_nonstringsþ z0TestBasic.test_write_column_multiindex_nonstringc    Cs^|}ddddddddgddddddddgg}tjtj dd¡|d}d    d
g|j_t||ƒdS) NrGrHr'rÐrÑrÒrÓr¾Z    ColLevel1Z    ColLevel2)r+r,r?rÄrÅrrurl©r–r!rr×rBrrrÚ#test_write_column_multiindex_stringsþ z-TestBasic.test_write_column_multiindex_stringcCs>|}ddddg}tjtj dd¡|d}d|j_t||ƒdS)    NrGrHr'rÐrÓr/r¾Z    StringCol)r+r,r?rÄrÅrr´rlrÞrrrÚtest_write_column_index_string*s
 z(TestBasic.test_write_column_index_stringcCsTddddg}tjtj dd¡|d}d|j_|dkrF| ||td    ¡n
t    ||ƒdS)
Nr$r%r&r/rÓr¾Z NonStringColrrÙ)
r+r,r?rÄrÅrr´r™rÚrl)r–rr×rBrrrÚ!test_write_column_index_nonstring6s ÿz+TestBasic.test_write_column_index_nonstringzminimum pyarrow not installedrc
CsÖddlm}|dkr.tjjdd}|j |¡t t     ddddgd¡t     ddddgd    ¡t     d
d d dg¡t     d dd dg¡t     ddddgd¡t     ddddgd¡t     ddddgd¡dœ¡}t
  ¡,}|  ||¡t ||d}t ||dd}W5QRX|d
jt d¡kst‚t tj    ddddgddtj    ddddgddtj    d
d d dgddtj    d dd dgddtj    ddddgddtj    ddddgddtj    ddddgdddœ¡}    |dkrÆ|jd dd }|    jd dd }    t
 ||    ¡dS)!Nrrz.Fastparquet nullable dtype support is disabledrr$r%r&Úint64Zuint8r8r9rDTFr/çð?rKrLÚfloat32r2)r8r9rDr:r;r<ÚgrÚnumpy_nullable©rÚ dtype_backendÚInt64r3ÚUInt8rMÚbooleanZFloat32ÚFloat64)Zaxis)rnrorr£ÚxfailÚnodeÚ
add_markerrÚtablerr`rkZ write_tabler r4r?rqr+r,rÍra)
r–rrrvr£rðrgZresult1Zresult2rfrrrÚtest_dtype_backendDsF ÿ ÷ÿ
  ùÿ
zTestBasic.test_dtype_backendr4rérêrëÚobjectzdatetime64[ns, UTC]rQz    period[D]rìrMcCsTt dtjg|di¡}d}|dkr<t dtjgddi¡}t||ddi|ddS)NÚvaluer3rQrìrèrærÃ)r+r,rrl)r–r!r4rBrfrrrÚtest_read_empty_arrayvs$ ÿÿ ÿÿÿzTestBasic.test_read_empty_array)r r¡r¢r¨rªrr£Ú parametrizer¯r±rµr»rÉrØrÛrÝrßràráÚskipifrrñrôrrrrr¥„s< 
 
           
1÷þr¥c@s²eZdZdd„Zdd„Zdd„Zdd„Zd    d
„Zd d „Zd d„Z    e
j j e ƒdde
j  deejg¡dd„ƒƒZdd„Ze
j jdd„ƒZe
j jdd„ƒZe
j je d¡e
j  ddggg¡dd„ƒƒƒZe d¡d d!„ƒZe d¡d"d#„ƒZd$d%„Zd&d'„Ze
j jded(d)„gd*d+gd,d-d.„ƒZd/d0„Zd1d2„Z e d¡d3d4„ƒZ!e d¡d5d6„ƒZ"e d¡d7d8„ƒZ#d9d:„Z$d;d<„Z%e d¡d=d>„ƒZ&d?d@„Z'dAdB„Z(dCdD„Z)dEdF„Z*dGdH„Z+dIS)JÚTestParquetPyArrowcCsB|}tjdddd}| d¡}||d<dddg|d<t||ƒdS)Nr5r&úEurope/Brussels©r7ÚtzÚ datetime_tzTÚbool_with_none)r+rAr³rl)r–r!rXrBÚdtirrrÚ
test_basic™s 
zTestParquetPyArrow.test_basiccCs<|}tjdddd|d<t|||ddgdddgid    dS)
Nr5r&rørùrûrMrPrr°)r+rArl)r–r!rXrBrrrÚtest_basic_subset_columns¤s
 
üz,TestParquetPyArrow.test_basic_subset_columnscCs:|j|d}t|tƒst‚t|ƒ}t|ƒ}t ||¡dS)Nr)r r…rNrqrr r`ra)r–r!rXZ    buf_bytesZ
buf_streamÚresrrrÚ*test_to_bytes_without_path_or_buf_provided²s
 z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedcCs8tjt d¡ dd¡tdƒd ¡}| ||td¡dS)Né r/r&Úaaar¾zDuplicate column names found©    r+r,r?r@Zreshaper=rÆr™r{©r–r!rBrrrÚtest_duplicate_columns½s$z)TestParquetPyArrow.test_duplicate_columnscCs:t dtjdddi¡}tr,| ||t¡n
t||ƒdS)Nr8ú1 dayr&r6)r+r,Útimedelta_rangerr›ÚNotImplementedErrorrlrrrrÚtest_timedeltaÂsz!TestParquetPyArrow.test_timedeltacCs(t ddddgi¡}| ||tj¡dS)Nr8r$rK)r+r,r›rÚArrowExceptionrrrrÚtest_unsupportedÉsz#TestParquetPyArrow.test_unsupportedcCs6tjddtjd}tj|dgd}| ||tj¡dS)Nr%é
r3Úfp16©Údatar)r?r@Úfloat16r+r,r›rr )r–r!rrBrrrÚtest_unsupported_float16Ðsz+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsrÚ    path_typec
Csxtjddtjd}tj|dgd}t ¡D}||ƒ}t tj    ¡|j
||dW5QRXt j   |¡rjt‚W5QRXdS)Nr%r r3rr)rgr)r?r@rr+r,r`rkršrr r ÚosrgÚisfilerq)r–r!rrrBZpath_strrgrrrÚ test_unsupported_float16_cleanup×s 
z3TestParquetPyArrow.test_unsupported_float16_cleanupcCsxt ¡}t tdƒ¡|d<tjddddddgt dddg¡d|d<tjddddddgddd    gd
d |d<t||ƒdS) NZabcdefr8rGr'rHr3r9rDr:T)Ú
categoriesZordered)r+r,Ú Categoricalr=ZCategoricalDtyperlrrrrÚtest_categoricalìsþ
ÿ
z#TestParquetPyArrow.test_categoricalcCs4t d¡}|jf|Ž}d|i}t||d||ddS)NÚs3fsÚ
filesystemzpandas-test/pyarrow.parquet©rgrhri)rržZ S3FileSystemrl)r–r-Ú s3_resourcer!Ús3sorZs3ÚkwrrrÚtest_s3_roundtrip_explicit_fsþs
 ûz0TestParquetPyArrow.test_s3_roundtrip_explicit_fscCsd|i}t||d||ddS)NÚstorage_optionsz s3://pandas-test/pyarrow.parquetr©rl)r–r-rr!rrrrÚtest_s3_roundtrip sûz$TestParquetPyArrow.test_s3_roundtriprÚ partition_colr)c
Cs^| ¡}|r6| t |tj¡¡}d}|| |¡||<t|||dd|i|d|dœddddS)NÚcategoryzs3://pandas-test/parquet_dirr!)Úpartition_colsrZr!Tr$)rfrgrhrir^rb)rÆrVÚdictÚfromkeysr?Úint32rl)r–r-rr!r$rZ expected_dfZpartition_col_typerrrÚtest_s3_roundtrip_for_dirs( ÿýôz,TestParquetPyArrow.test_s3_roundtrip_for_dirrcCs(tƒ}| |¡t|ƒ}t ||¡dSr)rr r r`ra)r–r-ÚbufferZ df_from_bufrrrÚtest_read_file_like_obj_support>s
z2TestParquetPyArrow.test_read_file_like_obj_supportc    Csb| dd¡| dd¡tjtddtdƒW5QRXtjtdd| d¡W5QRXdS)NÚHOMEZ TestingUserÚ USERPROFILEz.*TestingUser.*rxz~/file.parquet)ZsetenvrrzÚOSErrorr r )r–r-Z monkeypatchrrrÚtest_expand_userEs   z#TestParquetPyArrow.test_expand_usercCs>ddg}|}|j||ddt||ƒt|ƒj|jks:t‚dS)NrRrP©r&rZ©r rwr Úshaperq)r–Útmp_pathr!rXr&rBrrrÚtest_partition_cols_supportedNs
 
z0TestParquetPyArrow.test_partition_cols_supportedcCs@d}|g}|}|j||ddt||ƒt|ƒj|jks<t‚dS)NrRr1r2)r–r4r!rXr&Úpartition_cols_listrBrrrÚtest_partition_cols_stringVs 
z-TestParquetPyArrow.test_partition_cols_stringcCs|Srr)ÚxrrrÚ<lambda>`ózTestParquetPyArrow.<lambda>rMz pathlib.Path)Úidsc    Cs<d}|g}|}||ƒ}|j||dt|ƒj|jks8t‚dS)Nr*)r&)r r r3rq)    r–r4r!r-rr&r6rBrgrrrÚtest_partition_cols_pathlib_s z.TestParquetPyArrow.test_partition_cols_pathlibcCstjggd}t||ƒdS)N)r²r)r+r,rlrrrrÚtest_empty_dataframemsz'TestParquetPyArrow.test_empty_dataframecCsVddl}t dddgi¡}| |jd| ¡dg¡}| t¡}t||d|i|ddS)Nrr8r$)ÚtypertrÎ)    rr+r,rtÚfieldZbool_rVrRrl)r–r!rrBrtZout_dfrrrÚtest_write_with_schemars
 
z)TestParquetPyArrow.test_write_with_schemacCszt tjdddgddtjdddgddtjdddgd    dd
œ¡}t||ƒt dtjddddgddi¡}t||ƒdS) Nr$r%r&rér3ZUInt32r8rDrM)r8r9rD)r+r,r¦rlrrrrÚ test_additional_extension_arrayszsýÿ
 z3TestParquetPyArrow.test_additional_extension_arraysc    CsVt dtjdddgddi¡}t d|¡"t||| d|›d¡dW5QRXdS)    Nr8rDzstring[pyarrow]r3Ústring_storagezstring[ú]©rf)r+r,r¦r€rlrV)r–r!rBrBrrrÚ test_pyarrow_backed_string_arrayŠsz3TestParquetPyArrow.test_pyarrow_backed_string_arrayc CsNt tj dddg¡tjddddtj tjdddd¡d    œ¡}t||ƒdS)
N)rr$)r$r%)r&r/z
2012-01-01r&ÚD)r7r¼r/)rDr:r;)r+r,Z IntervalIndexrºÚ period_rangeZ from_breaksrArlrrrrÚtest_additional_extension_types‘sÿüÿ
z2TestParquetPyArrow.test_additional_extension_typescCs>ts
d}nd}t dtjddddi¡}t||d|id    dS)
Nz2.6z2.0r8z
2017-01-01Z1nr ©r¼r7Úversionr®)rr+r,rArl)r–r!ÚverrBrrrÚtest_timestamp_nanoseconds¡s
z-TestParquetPyArrow.test_timestamp_nanosecondscCsVts(|jtjjkr(|j tjj    dd¡d|g}t
j |d|id}t ||dddS)Nzitemporary skip this test until it is properly resolved: https://github.com/pandas-dev/pandas/issues/37286réÚ index_as_col©r²rF)r_) rÚtzinforSÚtimezoneÚutcrîrïrr£rír+r,rl)r–rr!rYÚidxrBrrrÚtest_timezone_aware_index¬sÿ þÿÿ
 z,TestParquetPyArrow.test_timezone_aware_indexc    Cs^t dttddƒƒi¡}t ¡$}| ||¡t||dgdd}W5QRXt|ƒdksZt    ‚dS)Nr8rr&©r8z==rF)ÚfiltersZuse_legacy_datasetr$)
r+r,r=r>r`rkr r rprq)r–r!rBrgr‘rrrÚtest_filter_row_groupsÅs
 ÿz)TestParquetPyArrow.test_filter_row_groupsc    Cs~tjtj dd¡dddgd}t ¡}| ||¡t||ƒ}W5QRX|rdt    |j
tj j j ƒszt‚nt    |j
tj j jƒszt‚dS)Nr r&r)r*ÚCr¾)r+r,r?rÄrÅr`rkr r r…Z_mgrÚcoreZ    internalsZ ArrayManagerrqZ BlockManager)r–r!Zusing_array_managerrBrgr‘rrrÚtest_read_parquet_managerÐs
 z,TestParquetPyArrow.test_read_parquet_managercCs´ddl}|}tjdddd}| d¡}||d<dddg|d<|j |¡}|jtjd    }|d
 d ¡|d
<|d  d ¡|d <|d t |j    d dd¡¡|d<t
||ddi|ddS)Nrr5r&rørùrûTrü)Z types_mapperrSztimestamp[us][pyarrow]rTÚus)Úunitrúrèr©rrhrf) rr+rAr³ZTableZ from_pandasZ    to_pandasZ
ArrowDtyperVÚ    timestamprl)r–r!rXrrBrýZpa_tablerfrrrÚ&test_read_dtype_backend_pyarrow_configÜs*
 ÿÿüz9TestParquetPyArrow.test_read_dtype_backend_pyarrow_configcCsntjdddgitjddgdddd    }| ¡}d
dl}t|jƒtd ƒkrV|j d¡|_t    ||d d i|ddS)Nr8r$r%r&r/Útest©r´zint64[pyarrow])r²r4rz11.0.0rèrr])
r+r,ÚIndexrÆrrr‰r²rVrl)r–r!rBrfrrrrÚ,test_read_dtype_backend_pyarrow_config_indexùs
ÿüz?TestParquetPyArrow.test_read_dtype_backend_pyarrow_config_indexc    Cs”t tdƒttddƒƒdœ¡}ddg|_t||ƒddg|_tjtdd    t||ƒW5QRXt         d
dddd¡t         d
dddd¡g|_t||ƒdS) Nr.r$r/r©rrErFz|S3rxéÛ)
r+r,r=r>rrlrrzr    rSrrrrÚtest_columns_dtypes_not_invalid    s
 
 
þz2TestParquetPyArrow.test_columns_dtypes_not_invalidcCs*tjtjdddgddd}t||ƒdS)Nr8r9rDú custom namerarÏ©r+r,rbrlrrrrÚtest_empty_columnssz%TestParquetPyArrow.test_empty_columnsN),r r¡r¢rþrÿrrr
r rrr£rírrõÚstrÚpathlibÚPathrrÚ
single_cpur r#ÚtdZ
skip_if_nor*r,r0r5r7r<r=r@rArErHrLrTrWrZr_rcrerhrrrrr÷˜sp  ý 
 
þþ
 
    
ÿ
 
 
 
 
 
 r÷c@s¬eZdZdd„Zdd„Zdd„Zdd„Zd    d
„Zd d „Zd d„Z    e
j j dd„ƒZ dd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd „Zd!d"„Zd#d$„Zd%d&„Zd'S)(ÚTestParquetFastParquetcCsF|}tjdddd}| d¡}||d<tjddd|d<t||ƒdS)    Nr5r&z
US/Easternrùrûrr6Ú    timedelta)r+rAr³rrl)r–r#rXrBrýrrrrþ$s 
z!TestParquetFastParquet.test_basiccCs”t tdƒttddƒƒdœ¡}t}d}ddg|_| ||||¡ddg|_| ||||¡t d    dddd¡t d    dddd¡g|_| ||||¡dS)
Nr.r$r/r©rÙrrErFrd)r+r,r=r>rÚrr™rS)r–r#rBrÜr|rrrÚtest_columns_dtypes_invalid-s
 
þz2TestParquetFastParquet.test_columns_dtypes_invalidcCs<tjt d¡ dd¡tdƒd ¡}d}| ||t|¡dS)Nrr/r&rr¾z9Cannot create parquet dataset with duplicate column namesr©r–r#rBr|rrrrBs$z-TestParquetFastParquet.test_duplicate_columnscCsBt ddddgi¡}tjddtjdgidd}t|||dddS)    Nr8TFrãgrr3)rfr_)r+r,r?rUrl©r–r#rBrfrrrÚtest_bool_with_noneHsz*TestParquetFastParquet.test_bool_with_nonecCsVt dtjddddi¡}| ||td¡t ddddgi¡}d}| ||t|¡dS)    Nr8Z2013ÚMr&rIr$rKz"Can't infer object conversion type)r+r,rGr™r{rqrrrr Os
z'TestParquetFastParquet.test_unsupportedcCs&t dt tdƒ¡i¡}t||ƒdS)Nr8r.)r+r,rr=rl)r–r#rBrrrrZsz'TestParquetFastParquet.test_categoricalc    Csfdttddƒƒi}t |¡}t ¡(}|j||dddt||dgd}W5QRXt|ƒdksbt    ‚dS)Nr8rr&r$)rZZrow_group_offsetsrU)rV)
r=r>r+r,r`rkr r rprq)r–r#r:rBrgr‘rrrrW^s 
 
z-TestParquetFastParquet.test_filter_row_groupscCs t||dd|id|dœddS)Nz$s3://pandas-test/fastparquet.parquetr!)rZr!rr")r–r-rr#rrrrr#fsûz(TestParquetFastParquet.test_s3_roundtripcCs\ddg}|}|j|d|ddtj |¡s.t‚ddl}| t|ƒd¡j}t    |ƒdksXt‚dS)NrRrPr©rr&rZrFr%©
r rrgÚexistsrqrZ ParquetFileriZcatsrp©r–r4r#rXr&rBrZactual_partition_colsrrrr5qsüz4TestParquetFastParquet.test_partition_cols_supportedcCsXd}|}|j|d|ddtj |¡s*t‚ddl}| t|ƒd¡j}t    |ƒdksTt‚dS)NrRrrurFr$rvrxrrrr7süz1TestParquetFastParquet.test_partition_cols_stringcCs\ddg}|}|j|dd|dtj |¡s.t‚ddl}| t|ƒd¡j}t    |ƒdksXt‚dS)NrRrPr)rrZÚ partition_onrFr%rvrxrrrÚtest_partition_on_supported‘süz2TestParquetFastParquet.test_partition_on_supportedc    CsBddg}|}d}tjt|d|j|dd||dW5QRXdS)NrRrPzYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datarxr)rrZryr&)rrzr{r )r–r4r#rXr&rBr|rrrÚ3test_error_on_using_partition_cols_and_partition_on¡sÿûzJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_oncCs"t ¡}| ¡}t|||ddS)NrD)r+r,rÆrlrrrrrr=´sz+TestParquetFastParquet.test_empty_dataframecCs>d|g}tj|d|id}| ¡}d|j_t|||ddS)NrMrNrOr²rD)r+r,rÆr²r´rl)r–r#rYrSrBrfrrrrTºs
 
z0TestParquetFastParquet.test_timezone_aware_indexc Csšt dddgi¡}t ¡v}| |¡tjtdd*t t    ¡t
|dddW5QRXW5QRXtjtddt
|dd    d
W5QRXW5QRXdS) Nr8r$r%z!not supported for the fastparquetrxrT)rZuse_nullable_dtypesrrç) r+r,r`rkr rrzr{Zassert_produces_warningÚ FutureWarningr )r–r#rBrgrrrÚ&test_use_nullable_dtypes_not_supportedÃs
 
 "z=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc
Csbt d¡N}t |¡ d¡tjtddt|ddW5QRXt |¡j    ddW5QRXdS)    Nú test.parquetsbreakitÚrxrrF)Ú
missing_ok)
r`rkrjrkÚ write_bytesrrzÚ    Exceptionr Úunlink)r–rgrrrÚ$test_close_file_handle_on_read_errorÎs
 z;TestParquetFastParquet.test_close_file_handle_on_read_errorc
Csptjddgddgdœd}t d¡6}t| ¡dƒ}| |¡W5QRXt||d}W5QRXt ||¡dS)Nrr$r()rr~Úwbr)    r+r,r`rkÚopenÚencoder r ra)r–rrBrgr<r‘rrrÚtest_bytes_file_nameÖs  z+TestParquetFastParquet.test_bytes_file_namec
Csfd}t dttddƒƒi¡}t d¡6}| |¡tjt    |dt
|ddW5QRXW5QRXdS)    NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.rPr$r/z tmp.parquetrxÚnumpy)rè) r+r,r=r>r`rkr rrzr{r )r–rr|rBrgrrrÚtest_invalid_dtype_backendàsÿ 
z1TestParquetFastParquet.test_invalid_dtype_backendcCsJtjtjdddgddd}tjtjdddgddd}t|||ddS)Nr8r9rDrfrarÏrDrgrrrrrrhësz)TestParquetFastParquet.test_empty_columnsN)r r¡r¢rþrprrsr rrWrr£rlr#r5r7rzr{r=rTr}r„rˆrŠrhrrrrrn#s(     
 
     
 rn)    NNNNNTFTr%)GÚ__doc__rSÚiorrrjÚwarningsrr‰r?rZpandas._configrZ pandas.compatrZpandas.compat.pyarrowrrZpandas.util._test_decoratorsÚutilZ_test_decoratorsrmZpandasr+Zpandas._testingZ_testingr`Zpandas.util.versionrZpandas.io.parquetr    r
r r r rrrŠrr"Zfixturerr£rörr!r#r-rCrXÚnowrQrRÚminÚmaxÚstrptimerYrlrwr}rr‚r„r†r‹r’r“r”r¥r÷rnrrrrÚ<module>s¸      
 
þþÿþøÿ
 
 
 
 
 
    ùÿ
ö
A+