1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
U
­ý°deã@s‚ddlZddlZddlZddlZddlmZddlZddlm    Z    m
Z
m Z m Z m Z mZddlmZddlmZmZGdd„dƒZdS)éN)Úis_integer_dtype)Ú CategoricalÚCategoricalIndexÚ    DataFrameÚ
RangeIndexÚSeriesÚ get_dummies)Ú SparseArrayÚ SparseDtypec
@s(eZdZejdd„ƒZejddejedgddd„ƒZ    ejd    d
gdd d „ƒZ
d d„Z dd„Z dd„Z dd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd „Zd!d"„Zd#d$„Zd%d&„Zd'd(„Zd)d*„Zd+d,„Zd-d.„Zd/d0„Zd1d2„Zej d3d4e d5d6giƒie d7d8giƒfd4e d9d5giƒie d:d8giƒfe d9d6giƒd5d;œe d7d8giƒfe d9d6giƒd5d<œe d=d8giƒfg¡d>d?„ƒZ!d@dA„Z"dBdC„Z#dDdE„Z$dFdG„Z%dHdI„Z&dJdK„Z'dLdM„Z(dNdO„Z)ej dPd8dQg¡dRdS„ƒZ*ej d
d8dQg¡dTdU„ƒZ+dVdW„Z,dXdY„Z-ej dZd[g¡d\d]„ƒZ.d^d_„Z/d`da„Z0dS)bÚTestGetDummiescCs"tdddgdddgdddgdœƒS)NÚaÚbÚcééé)ÚAÚBÚC)r)Úself©rú\d:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\pandas/tests/reshape/test_get_dummies.pyÚdfszTestGetDummies.dfÚuint8Úi8N)ÚparamscCs t |j¡S©N)ÚnpÚdtypeÚparam©rÚrequestrrrrszTestGetDummies.dtypeZdenseÚsparsecCs
|jdkS)Nr")rr rrrr""szTestGetDummies.sparsecCs|dkrtjS|Sr)rr)rrrrrÚeffective_dtype(szTestGetDummies.effective_dtypec    Cs.d}tjt|dt|ddW5QRXdS)Nz1dtype=object is not a valid dtype for get_dummies©ÚmatchÚobject©r)ÚpytestÚraisesÚ
ValueErrorr)rrÚmsgrrrÚ'test_get_dummies_raises_on_dtype_object-sz6TestGetDummies.test_get_dummies_raises_on_dtype_objectcCs¸tdƒ}t|ƒ}t|tdƒƒ}tdddgdddgdddgdœ| |¡d}|r\|jtdd}t|||d    }t ||¡t|||d    }t ||¡tdƒ|_    t|||d    }t ||¡dS)
NÚabcÚABCrr©r r rr'ç©Ú
fill_value©r"r)
Úlistrrr#Úapplyr    rÚtmÚassert_frame_equalÚindex)rr"rÚs_listÚs_seriesÚs_series_indexÚexpectedÚresultrrrÚtest_get_dummies_basic2s þ  
z%TestGetDummies.test_get_dummies_basicc CsÂtdƒ}t|ƒ}tdddddgdddddgdddddgd    œƒ}tdddgdddgdddgd    œ| |¡tdƒd
}|r¢t|ƒr‚d}n|tkrd }nd }|jt|d }t|||d}t     
||¡t|||d}t     
||¡t||j ||d}|r
d| |¡j ›d|›d}    n | |¡j }    t|    didd}|j  ¡}dd„|jDƒ|_t     ||¡t|dg||d}dddœ}
d|
 |    d¡|
|    <t|
dd ¡}|j  ¡}dd„|jDƒ|_| ¡}t     ||¡dS)Nr-rrrrrrrr/)rÚcolumnsFr0r1r3)r?r"rzSparse[z, ú]éÚcount©ÚnamecSsg|] }t|ƒ‘qSr©Ústr©Ú.0ÚirrrÚ
<listcomp>ksz?TestGetDummies.test_get_dummies_basic_types.<locals>.<listcomp>r )Úint64r&cSsg|] }t|ƒ‘qSrrErGrrrrJus)r4rrr#rÚboolr5r    rr6r7r?rDZdtypesZ value_countsr8Zassert_series_equalÚgetÚ
sort_index) rr"rr9r:Zs_dfr<r2r=Z
dtype_nameZexpected_countsrrrÚtest_get_dummies_basic_typesGsJ(ÿý   
 
 
z+TestGetDummies.test_get_dummies_basic_typescCs tjg}t|ƒ}t|dgd}t||d}t||d}t||d}|jsLt‚|jsVt‚|js`t‚|j ¡dgkstt‚|j ¡dgksˆt‚|j ¡dgksœt‚dS)Nr©r8©r"r)rÚnanrrÚemptyÚAssertionErrorr8Útolist)rr"Z just_na_listZjust_na_seriesZjust_na_series_indexZres_listZ
res_seriesZres_series_indexrrrÚtest_get_dummies_just_nays   
 
 
z'TestGetDummies.test_get_dummies_just_nac
    Cs$ddtjg}t|||d}tdddgdddgdœ| |¡d}|rP|jtdd    }t ||¡t|d
||d }ttjdddgddddgddddgi| |¡d}|j    ddtjgdd }|j
|_
|rÎ|jtdd    }t ||¡ttjgd
||d }tt ddgd tjg| |¡d}    t  |j |    j ¡dS)Nr r r3rr)r r r'r0r1T©Údummy_nar"r©ZaxisrP©r?r)rrRrrr#r5r    r6r7Úreindexr?rZassert_numpy_array_equalÚvalues)
rr"rÚsÚresÚexpÚres_naÚexp_naÚ res_just_naÚ exp_just_narrrÚtest_get_dummies_include_naŠs4 ÿ "þ  ÿz*TestGetDummies.test_get_dummies_include_nacCsjd}t d¡}|||g}t|d|d}tddddgd|›dddgiƒ}|rZ|jtd    d
}t ||¡dS) NÚezLATIN SMALL LETTER E WITH ACUTEÚletter©Úprefixr"Zletter_eTFZletter_rr1)Ú unicodedataÚlookuprrr5r    r6r7)rr"reÚeacuter]r^r_rrrÚtest_get_dummies_unicode§s
 
ÿz'TestGetDummies.test_get_dummies_unicodecCs¤|ddg}t||d}tdddgdddgdddgdddgdœtd}|r”ttdddgddtdddgddtdddgddtdddgdddœƒ}t ||¡dS)    NrrrQrr©ÚA_aÚA_bÚB_bÚB_cr'rL)rrrLr    r6r7©rrr"r=r<rrrÚtest_dataframe_dummies_all_obj´s  $þüÿ    z-TestGetDummies.test_dataframe_dummies_all_objcCsb|ddg}| dddœ¡}t|ƒ}tdddgdddgdddgdddgdœtd    }t ||¡dS)
Nrrr&Ústring©rrrrrmr')ÚastyperrrLr6r7©rrr=r<rrrÚ#test_dataframe_dummies_string_dtypeÇs üù    z2TestGetDummies.test_dataframe_dummies_string_dtypec    Cs t|||d}|r"t}t|dƒ}n
tj}|}tdddg|dddg|d|dddg|d|dddg|d|dddg|ddœƒ}|dd    d
d d g}t ||¡dS) Nr3rrrrr')rrnrorprqrrnrorprq)rr    r
rÚarrayrr6r7©rrr"rr=ZarrÚtypr<rrrÚ"test_dataframe_dummies_mix_default×s  ûÿ    z1TestGetDummies.test_dataframe_dummies_mix_defaultcs¨ddg}t|||d}tdddgdddgdddgdddgdddgd    œƒ}|d
g|d
g<d d d dg}|d
g|}|rztnt‰|| ‡fdd„¡||<t ||¡dS)NÚfrom_AÚfrom_BrgrrrTF©rÚfrom_A_aÚfrom_A_bÚfrom_B_bÚfrom_B_crr€rr‚rƒcsˆ|ƒSrr)Úx©r{rrÚ<lambda>üózCTestGetDummies.test_dataframe_dummies_prefix_list.<locals>.<lambda>)rrr    rr5r6r7)rrr"Úprefixesr=r<Úcolsrr…rÚ"test_dataframe_dummies_prefix_listës ûÿ      z1TestGetDummies.test_dataframe_dummies_prefix_listc
CsÚt|d|d}ddddg}tdddddgd    ddddgd
ddddggd g|d }| d tji¡}|rÊtjtdd    d
gd d tdddgdddtdddgdddtdddgdddtdddgdddgdd}t     ||¡dS)NÚbadrgZbad_aZbad_bZbad_crTFrrr©r?rCz Sparse[bool])rDrrY)
rrrvrrKÚpdÚconcatrr6r7)rrr"r=Z bad_columnsr<rrrÚ!test_dataframe_dummies_prefix_strÿs*    ýúûø z0TestGetDummies.test_dataframe_dummies_prefix_strcCsªt|dgdg|d}tdddgdddgdd    dgd    dd    gd
œƒ}|j}||dd… t¡||dd…<|d g|d g<|ršd d g}|| tdd    ƒ¡||<t ||¡dS)Nr}r)rhr?r"r rrrrr)rrr€rrr€rrL)rrr?rvrLr
r6r7©rrr"r=r<r‰rrrÚtest_dataframe_dummies_subsetsüÿ"z,TestGetDummies.test_dataframe_dummies_subsetcCsêt|d|d}tdddgdddgdddgdddgdddgdœƒ}|d    g|d    g<|d    d
d d d g}|rŠd
d d d g}|| tddƒ¡||<t ||¡t|ddg|d}|jdddœd}t ||¡t|dddœ|d}t ||¡dS)Nz..©Ú
prefix_sepr"rrrTF)rúA..aúA..búB..búB..crr”r•r–r—rLrÚ__ZB__bZB__c)r–r—rŒru)rrrvr
r6r7ÚrenamerrrrÚ!test_dataframe_dummies_prefix_sep.s(ûÿ       z0TestGetDummies.test_dataframe_dummies_prefix_sepc    Cs8t d¡}tjt|dt|dg|dW5QRXdS)NzPLength of 'prefix' (1) did not match the length of the columns being encoded (2)r$ztoo fewrg©ÚreÚescaper(r)r*r©rrr"r+rrrÚ(test_dataframe_dummies_prefix_bad_lengthHs
ÿz7TestGetDummies.test_dataframe_dummies_prefix_bad_lengthc    Cs8t d¡}tjt|dt|dg|dW5QRXdS)NzTLength of 'prefix_sep' (1) did not match the length of the columns being encoded (2)r$r‹r’r›ržrrrÚ,test_dataframe_dummies_prefix_sep_bad_lengthPs
ÿz;TestGetDummies.test_dataframe_dummies_prefix_sep_bad_lengthcCs¶dddœ}tdddgdddgddd    gd
œƒ}t|||d }tdddgdd dgd dd gddd gd d dgd œƒ}ddddg}|| t¡||<|r¦|| tdd ƒ¡||<t ||¡dS)Nr}r~rurrrr r r)rrrrgrrr€rr‚rƒrL)rrrvrLr
r6r7)rr"rˆrr=r<r?rrrÚ"test_dataframe_dummies_prefix_dictXs 
"ûÿ
z1TestGetDummies.test_dataframe_dummies_prefix_dictc Cstjtjtjg|jddd…f<t|d||djdd}|rJt}t|dƒ}n
tj}|}tdddtjg|ddddg|d|ddddg|d|ddddg|d|ddddg|d|ddddg|d|ddddg|dd    œƒjdd}t     
||¡t|d
||d}|d d d ddg}t     
||¡dS)NrTrWrrYrrr')rrnroÚA_nanrprqÚB_nanFrrnrorprq) rrRÚlocrrNr    r
ryrr6r7rzrrrÚtest_dataframe_dummies_with_nans2ÿ  ùÿ
ö z-TestGetDummies.test_dataframe_dummies_with_nac CsÐtdddgƒ|d<t|||djdd}|r<t}t|dƒ}n
tj}|}tddd    g|dddg|d
|dddg|d
|dddg|d
|dddg|d
|dddg|d
|dddg|d
d œƒjdd}t     ||¡dS) Nr„ÚyÚcatr3rrYrrrr')rrnrorprqZcat_xÚcat_y)
rrrNr    r
rryrr6r7rzrrrÚ'test_dataframe_dummies_with_categoricals( ùÿ
ö z6TestGetDummies.test_dataframe_dummies_with_categoricalzget_dummies_kwargs,expectedÚdataõär uä_aTr„ux_ä)rªrh)rªr“uxäacCstf|Ž}t ||¡dSr)rr6r7)rZget_dummies_kwargsr<r=rrrÚtest_dataframe_dummies_unicode¥s
z-TestGetDummies.test_dataframe_dummies_unicodecCsªtdƒ}t|ƒ}t|tdƒƒ}tdddgdddgdœtd}t|d|d}|r\|jtdd    }t ||¡t|d|d}t ||¡tdƒ|_    t|d|d}t ||¡dS)
Nr-r.rr)r rr'T©Ú
drop_firstr"r1)
r4rrrLrr5r    r6r7r8©rr"r9r:r;r<r=rrrÚ!test_get_dummies_basic_drop_firstÀs  
z0TestGetDummies.test_get_dummies_basic_drop_firstcCsŒtdƒ}t|ƒ}t|tdƒƒ}ttdƒd}t|d|d}t ||¡t|d|d}t ||¡ttdƒd}t|d|d}t ||¡dS)NZaaar.rrPTr­)r4rrrrr6r7r¯rrrÚ+test_get_dummies_basic_drop_first_one_levelÕs  z:TestGetDummies.test_get_dummies_basic_drop_first_one_levelc    Csàddtjg}t|d|d}tddddgitd}|rB|jtdd}t ||¡t|dd|d    }tddddgtjdddgitdj    dtjgdd
}|r¢|jtdd}t ||¡ttjgdd|d    }tt
dƒd }t ||¡dS) Nr r Tr­rrr'r1©rXr®r"rYrP) rrRrrrLr5r    r6r7r[r)    rr"Zs_NAr^r_r`rarbrcrrrÚ$test_get_dummies_basic_drop_first_NAçs,  "ÿ ÿz3TestGetDummies.test_get_dummies_basic_drop_first_NAcCsZ|ddg}t|d|d}tdddgdddgdœtd}|rJ|jtdd    }t ||¡dS)
NrrTr­rr)rorqr'r1)rrrLr5r    r6r7rrrrrÚ!test_dataframe_dummies_drop_firstÿs  z0TestGetDummies.test_dataframe_dummies_drop_firstcCs¤tdddgƒ|d<t|d|d}tdddgd    dd    gd    d    dgd    ddgd
œƒ}d d d g}|| t¡||<|dd d d g}|r”|D]}t||ƒ||<q~t ||¡dS)Nr„r¦r§Tr­rrrr)rrorqr¨rorqr¨r)rrrrvrLr    r6r7)rrr"rr=r<r‰ÚcolrrrÚ2test_dataframe_dummies_drop_first_with_categoricals$ÿ
zATestGetDummies.test_dataframe_dummies_drop_first_with_categoricalc    Csötjtjtjg|jddd…f<t|dd|djdd}tdddtjgddddgddddgddddgddddgdœƒ}d    d
d d g}|| t¡||<|jdd}|r¼|D]}t||ƒ||<q¦t     
||¡t|d d|d}|dd    d g}t     
||¡dS)NrTr²rrYrr)rror¢rqr£ror¢rqr£Fr) rrRr¤rrNrrvrLr    r6r7)rrr"r=r<r‰rµrrrÚ)test_dataframe_dummies_drop_first_with_nas6ÿþ 
 
 
 
ûÿ       z8TestGetDummies.test_dataframe_dummies_drop_first_with_nacCs˜tdddgƒ}t|ƒ}tddgddgddggddgtd}t ||¡ttdddgƒƒ}t|ƒ}tddgddgddggtddgƒtd}t ||¡dS)NrrrrZr r )rrrrLr6r7r)rrªr=r<rrrÚtest_get_dummies_int_int0s$ 
ÿz'TestGetDummies.test_get_dummies_int_intc    Cs¸tdddgtdddgƒdddgdddgdœƒ}dd    d
d d d g}tddddddgddddddgddddddgg|d}||dd… |¡||dd…<t|ddg|d}t ||¡dS)Nrrr r gð?g@)rrrÚDrr¹ÚA_1ÚA_2ZB_arprrŒrrrZ)rrrvrr6r7)rrrªr?r<r=rrrÚtest_get_dummies_int_df=s üÿ,þ"z&TestGetDummies.test_get_dummies_int_dfÚorderedFcCs|ttdƒtdƒ|d}t||d}tjdddgdddgg| |¡d}t|j|j|d}t||| |¡d}t     
||¡dS)NZxyZxyz)Ú
categoriesr½r'rrrZ) rr4rrryr#rr¾rr6r7)rrr½r§r=rªr‰r<rrrÚ1test_dataframe_dummies_preserve_categorical_dtypeOs $ÿz@TestGetDummies.test_dataframe_dummies_preserve_categorical_dtypecCsLt ddgddgdœ¡}t|dg|d}|jdgd    }t |dg|¡dS)
NrrZABZCD)ÚGDPÚNationrÁ©r?r"rÀrŒ)rÚ    from_dictrr[r6r7)rr"rZdf2rrrÚ*test_get_dummies_dont_sparsify_all_columns]sz9TestGetDummies.test_get_dummies_dont_sparsify_all_columnscCs~dddg|_t|ƒjdd}tdddddgdddddgdddddggddd    d    d
gd jdd}| dtji¡}t ||¡dS) NrrrYTFrrrnroZA_crŒ)    r?rrNrrvrrKr6r7rwrrrÚ"test_get_dummies_duplicate_columnsfs    ý úù    z1TestGetDummies.test_get_dummies_duplicate_columnscCs`tdddgiƒ}t|dgdd}tddƒ}ttddg|dtddg|dd    œƒ}t ||¡dS)
NrrrTrÂrLrr')rºr»)rrr
r    r6r7)rrr=rr<rrrÚtest_get_dummies_all_sparsexs
þÿz*TestGetDummies.test_get_dummies_all_sparser\Úbazc
Csptddddddgddddddgd    d
d d    d
d gd d ddddgdœƒ}d}tjt|dt||dW5QRXdS)NrrréééZoneZtworrrr„r¦ÚzÚqÚwÚt)ÚbarZfoorÇZzooz1Input must be a list-like for parameter `columns`r$rŒ)rr(r)Ú    TypeErrorr)rr\rr+rrrÚ#test_get_dummies_with_string_values„süÿ    z2TestGetDummies.test_get_dummies_with_string_valuescCsTttdƒƒ}t||d}tddddgddddgddddgdœ|d}t ||¡dS)NÚabcar'rrr/)rr4rrr6r7)rÚany_numeric_ea_and_arrow_dtypeZserr=r<rrrÚ test_get_dummies_ea_dtype_series•s  "þz/TestGetDummies.test_get_dummies_ea_dtype_seriescCsXtdtdƒiƒ}t||d}tddddgddddgddddgdœ|d}t ||¡dS)Nr„rÒr'rr)Zx_aZx_bZx_c)rr4rr6r7)rrÓrr=r<rrrÚ#test_get_dummies_ea_dtype_dataframeŸs "þz2TestGetDummies.test_get_dummies_ea_dtype_dataframe)1Ú__name__Ú
__module__Ú __qualname__r(ZfixturerrÚfloat64rLrr"r#r,r>rOrVrdrlrsrxr|rŠrr‘ršrŸr r¡r¥r©ÚmarkZ parametrizerr¬r°r±r³r´r¶r·r¸r¼r¿rÄrÅrÆrÑrÔrÕrrrrr sx
 
 
2  þ þ þ þóþ
 
 
 
 
r )rœriÚnumpyrr(Zpandas.core.dtypes.commonrZpandasrrrrrrrZpandas._testingZ_testingr6Zpandas.core.arrays.sparser    r
r rrrrÚ<module>s