zmc
2023-08-08 e792e9a60d958b93aef96050644f369feb25d61b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
U
®ý°dùgã @s¦ddlmZddlZddlZddlZddlmZmZmZm    Z    m
Z dd„Z dd„Z dd    „Zd
d „Zd d „Zdd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd„Zej dddg¡d d!„ƒZd"d#„Zd$d%„Zd&d'„Zd(d)„Zej d*d+d,d-gfd.dd-gfg¡d/d0„ƒZ d1d2„Z!d3d4„Z"ej d5gd6fgd7fgd8fgd9fd:d;d<gd6fd:d;d<gd9fd:d;d<gd8fd:d;d<gd9fg¡d=d>„ƒZ#d?d@„Z$dAdB„Z%dCdD„Z&dEdF„Z'dGdH„Z(dS)Ié)ÚdatetimeN)Ú    DataFrameÚIndexÚ
MultiIndexÚSeriesÚ_testingc    CsBtdtjdg|d}tjtdd|jjdddW5QRXdS)NÚfooBAD__barBADÚfoo©Údtypezexpand must be True or False©Úmatchú.*(BAD[_]+).*(BAD)©Úexpand)rÚnpÚnanÚpytestÚraisesÚ
ValueErrorÚstrÚextract)Úany_string_dtypeÚvalues©rúXd:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\pandas/tests/strings/test_extract.pyÚ+test_extract_expand_kwarg_wrong_type_raisessrcCs¤tdtjdg|d}tdtjtjg|d}|j d¡}t ||¡|jjddd}t ||¡tddgtjtjgtjtjgg|d}|jjd    d
d}t ||¡dS) Nrr    r
ÚBAD__z .*(BAD[_]+).*TrÚBADrF)rrrrrrÚtmÚassert_frame_equal)rÚsÚexpectedÚresultrrrÚtest_extract_expand_kwargs   ÿr$c
Cs²tdtjddt ¡ddddg    ƒ}|jjddd    }tjtjg}td
d g|d
d g||||||g    ƒ}t     ||¡|jjd dd    }td
tjd
tjtjtjtjtjtjg    ƒ}t 
||¡dS) NÚaBAD_BADÚ    BAD_b_BADTr    éç@rFrÚBAD_rz.*(BAD[_]+).*BAD) rrrrÚtodayrrrrr Úassert_series_equal)Úserr#Úerr"rrrÚ&test_extract_expand_False_mixed_object(sÿ " "ÿr.c    CsDtdddddgƒ}d}tjt|d|jjdd    d
W5QRXdS) NÚA1ÚA2ÚA3ZA4ZB5z,only one regex group is supported with Indexr ú ([AB])([123])Fr)rrrrrr)ÚidxÚmsgrrrÚ test_extract_expand_index_raises;sr5c    Csn|dddg|d}d}tjt|d|jjddd    W5QRXtjt|d|jjd
dd    W5QRXdS) Nr/ÚB2ÚC3r
ú"pattern contains no capture groupsr ú
[ABC][123]Frú
(?:[AB]).*©rrrrr©Úindex_or_seriesrÚs_or_idxr4rrrÚ,test_extract_expand_no_capture_groups_raisesEs r?cCsX|ddg|d}|jjddd}|ddgd|d    }|tkrHt ||¡n t ||¡dS)
Nr/r0r
ú (?P<uno>A)\dFrÚAÚuno©Únamer )rrrrr+Zassert_index_equal©r=rr>r#r"rrrÚ(test_extract_expand_single_capture_groupRs rFcCsàtdddg|d}|jjddd}ttjtjtjg|d}t ||¡|jjddd}ttjtjgtjtjgtjtjgg|d}t ||¡|jjd    dd}td
d tjg|d}t ||¡|jjd dd}td
d gd dgtjtjgg|d}t ||¡|jjddd}td
d tjgd|d}t ||¡|jjddd}td
d gd dgtjtjggddg|d}t ||¡|jjddd}td
d gd dgtjtjggddg|d}t ||¡|jjddd}td
d tjg|d}t ||¡tdddg|d}|jjddd}td
d gd dgtjtjgg|d}t ||¡tdddg|d}|jjddd}td
d gd dgtjdggddg|d}t ||¡tdddg|d}|jjddd}td
d gd dgdtjggddg|d}t ||¡dS) Nr/r6r7r
ú(_)Frú(_)(_)ú ([AB])[123]rAÚBr2Ú1Ú2ú(?P<letter>[AB])ÚletterrCú!(?P<letter>[AB])(?P<number>[123])Únumber©Úcolumnsr ú([AB])(?P<number>[123])rú([AB])(?:[123])ÚA11ÚB22ÚC33ú([AB])([123])(?:[123])Ú3ú"(?P<letter>[AB])?(?P<number>[123])ÚCú#(?P<letter>[ABC])(?P<number>[123])?)    rrrrrrr+rr ©rr!r#r"rrrÚ"test_extract_expand_capture_groups^s|  ÿ  ÿ  ý ý  ÿ ý ýr^cCsÔdddg}t|ƒdkr t d¡t|ƒt|ƒkr<| d¡}q |dt|ƒ…}t|||d}|jjdd    d
}td d tjg||d}t     
||¡|jjd d    d
}t dd gdd gdtjggddg||d}t      ||¡dS)Nr/r6r[rzTest requires len(index) > 0é©Úindexr ú(\d)FrrKrLú(?P<letter>\D)(?P<number>\d)?rArJrNrP©rRrar ) ÚlenrÚskipÚrepeatrrrrrrr+rr )rarÚdatar,r#r"rrrÚ(test_extract_expand_capture_groups_index±s$
 
  üricCsHtdddgd|d}|jjddd}td    d
d gd |d}t ||¡dS) NÚa3Úb3Úc2ZbobrCz(?P<sue>[a-z])FrÚaÚbÚcZsue)rrrrr+r]rrrÚ,test_extract_single_series_name_is_preservedÌsrpcCsZtdtjdg|d}|jjddd}tddgtjtjgtjtjgg|d}t ||¡dS)    Nrr    r
rTrrr)rrrrrrrr r]rrrÚtest_extract_expand_TrueÓsÿrqc
Csntjtjg}tdtjddt ¡ddddg    ƒ}|jjddd}td    d
g|d    d
g||||||g    ƒ}t     ||¡dS) Nr%r&Tr    r'r(rrr)r)
rrrrr*rrrrr )r-Úmixedr#r"rrrÚ%test_extract_expand_True_mixed_objectÞs  ÷ÿ"rsc    Csn|dddg|d}d}tjt|d|jjddd    W5QRXtjt|d|jjd
dd    W5QRXdS) Nr/r6r7r
r8r r9Trr:r;r<rrrÚ4test_extract_expand_True_single_capture_group_raisesós rtcCsD|ddg|d}|jjddd}tdddgi|d}t ||¡dS)    Nr/r0r
r@TrrBrA)rrrrr rErrrÚ-test_extract_expand_True_single_capture_groupsrurDÚ series_namecCsìtdddg||d}|jjddd}ttjtjtjg|d}t ||¡|jjd    dd}ttjtjgtjtjgtjtjgg|d}t ||¡|jjd
dd}td d tjg|d}t ||¡|jjd dd}td dgd dgtjtjgg|d}t ||¡|jjddd}tdd d tjgi|d}t ||¡|jjddd}td dgd dgtjtjggddg|d}t ||¡|jjddd}td dgd dgtjtjggddg|d}t ||¡|jjddd}td d tjg|d}t ||¡dS)Nr/r6r7rCrGTrr
rHrIrArJr2rKrLrMrNrOrPrQrSrrT©rrrrrrrr )rDrr!r#r"rrrÚtest_extract_series
sN  ÿ  ÿ  ý ý rxcCsütdddg|d}|jjddd}tdd    gd
d gtjtjgg|d}t ||¡td d dg|d}|jjddd}tdd    gd
d gtjdggddg|d}t ||¡td d dg|d}|jjddd}tdd    gd
d gdtjggddg|d}t ||¡dS)NrUrVrWr
rXTrrArKrJrLr/r6rYrZrNrPrQr[r\rwr]rrrÚtest_extract_optional_groupsDs.ÿ ý ýrycCs¼dddg}t|ƒt|ƒkr$t d¡|dt|ƒ…}t|||d}|jjddd}td    d
tjg||d}t     
||¡|jjd dd}td d    gd d
gdtjggddg||d}t     
||¡dS)Nr/r6r[zIndex too shortr`rbTrrKrLrcrArJrNrPrd) rerrfrrrrrrrr )rarrhr!r#r"rrrÚ+test_extract_dataframe_capture_groups_indexbs 
 
 ürzcCsJtdddgd|d}|jjddd}td    d
d d gi|d }t ||¡dS)NrjrkrlrvrCú(?P<letter>[a-z])TrrNrmrnror
)rrrrrr r]rrrÚ'test_extract_single_group_returns_frame|sr|c
    CsNdddddtjdg}ddd    d
d d d dg}d}dddg}t||d}tjddddddddgdd}t||||d}|jj|tj    d}t
  ||¡t dd d!d"d#d$d%g¡}    t||    |d&}tjd'd(d)d*d+d,d-d.gd/d}t||||d}|jj|tj    d}t
  ||¡t||    |d&}d0|j _ d1|_ t||||d}|jj|tj    d}t
  ||¡dS)2Nzdave@google.comztdhock5@gmail.comzmaudelaperriere@gmail.comz'rob@gmail.com some text steve@gmail.comz%a@b.com some text c@d.com and e@f.comÚ)ZdaveZgoogleÚcom)Ztdhock5Úgmailr~)Zmaudelaperriererr~)Zrobrr~)Zsteverr~)rmrnr~)roÚdr~)ÚeÚfr~zY
    (?P<user>[a-z0-9]+)
    @
    (?P<domain>[a-z]+)
    \.
    (?P<tld>[a-z]{2,4})
    ÚuserÚdomainÚtldr
©rr©r'r©r_r)ér)r‰r')ér)rŠr')rŠr_©Nr ©Únames)Úflags)ÚsingleÚDave)rÚToby)rÚMaude)ÚmultipleÚ robAndSteve)r“Úabcdef)ÚnoneÚmissing)r–Úemptyr`)rrr)rr‘r)rr’r)r“r”r)r“r”r')r“r•r)r“r•r')r“r•r_)NNr )ÚmatchesÚ description)r™ršr )rrrrÚ from_tuplesrrÚ
extractallÚreÚVERBOSErr rar)
rrhZexpected_tuplesÚpatZexpected_columnsr!Úexpected_indexr"r#ÚmirrrÚtest_extractall†s”ù
ø
 
 þÿ ùÿ ø
õ ÿ ÿr¢zpat,expected_namesrZrNrPz([AB])?(?P<number>[123])cCs`tdddg|d}|j |¡}tdtjdftjdfgtjdd    d
gd d ||d }t     ||¡dS)Nr}r/Ú32r
)rArKrYrLr‡rˆ©r_r'r‹rŒ)rarRr )
rrrœrrrrr›rr )rŸZexpected_namesrr!r#r"rrrÚtest_extractall_column_namesÛs
 ür¥cCsŽtdddgd|d}tjdddd    gd
d }|j d ¡}td ddddgi||d}t ||¡|j d¡}tddddg||d}t ||¡dS)NrjrkÚd4c2rvrCr†r‡rˆr¤r‹rŒr{rNrmrnr€ror`ú([a-z]))rrr›rrœrrr )rr!r r#r"rrrÚtest_extractall_single_groupñs&
ÿ ÿ  
ÿr¨cCsXtdddgd|d}|j d¡}tddd    d
gtjd d d dgdd|d}t ||¡dS)NZab3Zabc3Zd4cd2rvrCz([a-z]+)ÚabÚabcr€Zcdr†r‡rˆr¤r‹rŒr`)rrrœrrr›rr r]rrrÚ,test_extractall_single_group_with_quantifiers 
 
ÿûr«z data, names)N)Úi1)NÚi2)r¬r­rjrkr¦c    sBt|ƒ‰t|ƒdkr*ttˆƒ|dd}n$‡fdd„tˆƒDƒ}tj||d}t|d||d}tjg|d    d}|j d
¡}tdg||d }t     
||¡|j d ¡}tddg||d }t     
||¡|j d ¡}tdg||d }t     
||¡|j d¡}tddg||d }t     
||¡|j d¡}tddg||d }t     
||¡dS)Nr'r©rDc3s |]}t|gˆdƒVqdS)r'N)Útuple)Ú.0Úi©ÚnrrÚ    <genexpr>)sz-test_extractall_no_matches.<locals>.<genexpr>rŒrv©rDrar r z(z)rdz(z)(z)z (?P<first>z)Úfirstz(?P<first>z)(?P<second>z)Úsecondz(z)(?P<second>z)) rerÚrangerr›rrrœrrr )    rhrrraZtuplesr!r r#r"rr²rÚtest_extractall_no_matchessD      ÿ  ÿ  ÿr¹cCs
tdddgd|d}|j d¡}tddd    dgitjd
d d gdd gd|d}t ||¡|dkržtdddgƒtdddgddfD]}|j d¡}t ||¡q€tdddgdtdddgdd|d}|j d¡}tddd    dgitjdddgdd gd|d}t ||¡dS)NZa1a2Zb1Zc1ZxxxrCz[ab](?P<digit>\d)ÚdigitrKrLr†)rr'r‡r rŒr`Úobjectr®Zs_nameÚXXÚyyÚzzZidx_namerµ)r¼r)r¼r')r½r)    rrrœrrr›rr r)rr!r#r"r3rrrÚtest_extractall_stringindexNs<  ý  þ ü  ÿûr¿c    Cs>tdddgd|d}tjtdd|j d¡W5QRXdS)    Nrjrkr¦rvrCzno capture groupsr z[a-z])rrrrrrœ)rr!rrrÚ(test_extractall_no_capture_groups_raisesssrÀcCs‚tdddgdddgdd}|jjjd    d
d }td d dgƒ}t ||¡|jjjdd
d }dddg}t|ddgd}t ||¡dS)Nrjrkr¦r1ZB3ZD4rv)rarDz([A-Z])TrrArJÚDz!(?P<letter>[A-Z])(?P<digit>[0-9]))rArY)rJrY)rÁÚ4rNrº)rR)rrarrrrr )r!ÚrrZe_listrrrÚ!test_extract_index_one_two_groups{s 
rÄc Cstdddgd|d}d}|jj|dd}|j |¡}|jd    d
d }t ||¡d }|jj|dd}|j |¡}|jd    d
d }t ||¡d }|jj|dd}    |j |¡}|jd    d
d }t |    |¡d}
|jj|
dd} |j |
¡}|jd    d
d }t | |¡dS)NrjrkrlrvrCú([a-z])([0-9])Trrr ©Úlevelú!(?P<letter>[a-z])(?P<digit>[0-9])ú(?P<group_name>[a-z])r§)rrrrœÚxsrr ) rr!Úpattern_two_nonameÚextract_two_nonameZhas_multi_indexZno_multi_indexÚpattern_two_namedÚextract_two_namedÚpattern_one_namedÚextract_one_namedÚpattern_one_nonameÚextract_one_nonamerrrÚtest_extractall_same_as_extractŠs*       rÓc Cstjdddgdd}tdddg|d    |d
}d }|jj|d d }|j |¡}|jddd}t ||¡d}|jj|d d }|j |¡}|jddd}t ||¡d}    |jj|    d d }
|j |    ¡}|jddd}t |
|¡d} |jj| d d } |j | ¡}|jddd}t | |¡dS)N)rAr¶)rJr·)r[Úthird)ZcapitalZordinalrŒrjrkrlrv)rarDr rÅTrrr rÆrÈrÉr§)    rr›rrrrœrÊrr ) rr¡r!rËrÌZhas_match_indexZno_match_indexrÍrÎrÏrÐrÑrÒrrrÚ-test_extractall_same_as_extract_subject_index¦s2þ       rÕ))rrÚnumpyrrZpandasrrrrrrrr$r.r5r?rFr^rirprqrsrtruÚmarkZ parametrizerxryrzr|r¢r¥r¨r«r¹r¿rÀrÄrÓrÕrrrrÚ<module>sd     
  S 
9
U
 
üþ    
    øþ
+%