zmc
2023-08-08 e792e9a60d958b93aef96050644f369feb25d61b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
U
¡ý°d[<ã@slddlZddlZddlZddlmZddlmZmZmZGdd„deƒZ    Gdd„deƒZ
Gdd    „d    eƒZ dS)
éN)Ú BeautifulSoup)ÚEntitySubstitutionÚEncodingDetectorÚ UnicodeDammitc@sfeZdZdZdd„Zej dddddg¡d    d
„ƒZd d „Z    d d„Z
dd„Z dd„Z dd„Z dd„ZdS)ÚTestUnicodeDammitz"Standalone tests of UnicodeDammit.cCsd}t|ƒ}|j|kst‚dS)NuI'm already Unicode! â˜ƒ)rÚunicode_markupÚAssertionError)ÚselfÚmarkupÚdammit©r úLd:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\bs4/tests/test_dammit.pyÚtest_unicode_inputsz$TestUnicodeDammit.test_unicode_inputz smart_quotes_to,expect_converted)Nu ‘’“”)Úxmlz &#x2018;&#x2019;&#x201C;&#x201D;)Úhtmlz&lsquo;&rsquo;&ldquo;&rdquo;)Úasciiz''""cCs,d}t|dg|dj}|d |¡ks(t‚dS)zbVerify the functionality of the smart_quotes_to argument
        to the UnicodeDammit constructor.s<foo>‘’“”</foo>ú windows-1252)Úknown_definite_encodingsÚsmart_quotes_toz <foo>{}</foo>N)rrÚformatr)r    rZexpect_convertedr
Z    convertedr r r Útest_smart_quotes_tos þz&TestUnicodeDammit.test_smart_quotes_tocCs0d}t|ƒ}|j ¡dkst‚|jdks,t‚dS)NsSacré bleu! â˜ƒúutf-8uSacré bleu! â˜ƒ©rÚoriginal_encodingÚlowerrr)r    Úutf8r r r r Útest_detect_utf8&sz"TestUnicodeDammit.test_detect_utf8cCs4d}t|dgƒ}|j ¡dks"t‚|jdks0t‚dS)Nóíåìùú
iso-8859-8uםולשr©r    Úhebrewr r r r Útest_convert_hebrew,s z%TestUnicodeDammit.test_convert_hebrewcCs6d}t|ƒ}|j ¡dkst‚|j d¡|ks2t‚dS)Nsケータイ Watchr)rrrrrÚencode)r    Úutf_8r r r r Ú/test_dont_see_smart_quotes_where_there_are_none2szATestUnicodeDammit.test_dont_see_smart_quotes_where_there_are_nonecCs,d d¡}t|dgƒ}|j ¡dks(t‚dS)Nõ RäksmörgÃ¥srr©r"rrrr©r    Ú    utf8_datar r r r Ú test_ignore_inappropriate_codecs8s
 z2TestUnicodeDammit.test_ignore_inappropriate_codecscCs6d d¡}dD]"}t||gƒ}|j ¡dkst‚qdS)Nr%r)z.utf8z...z
utF---16.!r&)r    r(Z bad_encodingr r r r Útest_ignore_invalid_codecs=s
 z,TestUnicodeDammit.test_ignore_invalid_codecscCsLd d¡}t|dgd}|j ¡dks*t‚t|ddgd}|jdksHt‚dS)Nr%r)Zexclude_encodingsrr&r'r r r Útest_exclude_encodingsCs
ÿz(TestUnicodeDammit.test_exclude_encodingsN)Ú__name__Ú
__module__Ú __qualname__Ú__doc__rÚpytestÚmarkÚ parametrizerrr!r$r)r*r+r r r r r s"ýþ
 
rc@sTeZdZdd„Zdd„Zdd„Zdd„Zd    d
„Zd d „Zd d„Z    dd„Z
dd„Z dS)ÚTestEncodingDetectorcCs"tdƒ}t|jƒ}d|kst‚dS)Ns'<?xml version="1.0" encoding="UTF-Û" ?>uutf-�)rÚlistÚ    encodingsr)r    Zdetectedr5r r r ÚPtest_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterSs
ÿ
zeTestEncodingDetector.test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_charactercCs(dD]}t|dd}d|jkst‚qdS)N)s&<html><meta charset="euc-jp" /></html>s&<html><meta charset='euc-jp' /></html>s$<html><meta charset=euc-jp /></html>s#<html><meta charset=euc-jp/></html>T©Úis_htmlzeuc-jp)rrr©r    Údatar r r r Ú test_detect_html5_style_meta_tagYs z5TestEncodingDetector.test_detect_html5_style_meta_tagc    Cs€d}tjj}t tj¡zLdd„}|tj_t|ƒ}d|jks@t    ‚d|j
ksNt    ‚t |dƒ}|jsbt    ‚W5t tj¡|tj_XdS)NsT<?xml version="1.0" encoding="UTF-8"?>
<html><b>بتر</b>
<i>ÈÒѐÊÑíä</i></html>cSsdS©Nr )Ústrr r r ÚnoopyszETestEncodingDetector.test_last_ditch_entity_replacement.<locals>.noopTu�z html.parser) Úbs4r Zchardet_dammitÚloggingÚdisableÚWARNINGÚNOTSETrZcontains_replacement_charactersrrr)r    ÚdocÚchardetr>r Zsoupr r r Ú"test_last_ditch_entity_replacementcs 
 z7TestEncodingDetector.test_last_ditch_entity_replacementcCs,d}t|ƒ}d|jkst‚d|jks(t‚dS)Nóÿþ<a>áé</a>u <a>áé</a>úutf-16le)rrrrr9r r r Útest_byte_order_mark_removed†sz1TestEncodingDetector.test_byte_order_mark_removedcCs¢d}t|ƒ}t|dgd}d|jks(t‚t|dgd}d|jksDt‚dgdd„|jDƒks^t‚d    }t|dgd
gd }d
|jks‚t‚dd
gd d„|jDƒksžt‚dS) NrGzutf-16)rr)Úuser_encodingsrHcSsg|] }|d‘qS©rr ©Ú.0Úxr r r Ú
<listcomp>ŸszRTestEncodingDetector.test_known_definite_versus_user_encodings.<locals>.<listcomp>rr)rrJcSsg|] }|d‘qSrKr rLr r r rOªs©rrrÚtried_encodings)r    r:r ZbeforeÚafterr r r r Ú)test_known_definite_versus_user_encodingssÿz>TestEncodingDetector.test_known_definite_versus_user_encodingscCsJd}t|dgdgdgd}d|jks(t‚dddgdd„|jDƒksFt‚dS)Nrz    shift-jisrr)rZoverride_encodingsrJcSsg|] }|d‘qSrKr rLr r r rO»szKTestEncodingDetector.test_deprecated_override_encodings.<locals>.<listcomp>rPrr r r Ú"test_deprecated_override_encodings¬süÿz7TestEncodingDetector.test_deprecated_override_encodingsc    Cs`d d¡}d d¡}|||}t t¡| d¡W5QRXt |¡}d| d¡ks\t‚dS)Nu    â˜ƒâ˜ƒâ˜ƒru“Hi, I like Windows!â€Ú windows_1252u+☃☃☃“Hi, I like Windows!”☃☃☃)r"r0ZraisesÚUnicodeDecodeErrorÚdecoderÚ    detwingler)r    rrUrDZfixedr r r Útest_detwingle¾s
ÿþ  
z#TestEncodingDetector.test_detwinglecCs<dD]2}| d¡}| d¡s t‚t |¡}||kst‚qdS)N)uœuₓu𐐓ró“)r"ÚendswithrrrX)r    Ztricky_unicode_charÚinputÚoutputr r r Ú+test_detwingle_ignores_multibyte_charactersÕs
 
 
z@TestEncodingDetector.test_detwingle_ignores_multibyte_characterscCsd}| d¡}d}| d¡}tj}||dddks6t‚d||ddksJt‚d||ddks^t‚d||ƒksnt‚d||ƒks~t‚d    d
}|||ƒdksšt‚|||ƒdks®t‚|||ddd dksÈt‚||dd dksÜt‚|d    |dd dksôt‚|d |dd dkst‚dS)Nz0<html><head><meta charset="utf-8"></head></html>rz,<?xml version="1.0" encoding="ISO-8859-1" ?>Fr7rTz
iso-8859-1ó iˆ)r8Úsearch_entire_document)r`óa)r"rZfind_declared_encodingr)r    Z html_unicodeZ
html_bytesZ xml_unicodeZ    xml_bytesÚmZspacerr r r Útest_find_declared_encodingäs(
 
ÿÿz0TestEncodingDetector.test_find_declared_encodingN) r,r-r.r6r;rFrIrSrTrYr^rcr r r r r3Qs
#r3c@s’eZdZdZdd„Zej dddg¡dd„ƒZd    d
„Z    d d „Z
d d„Z dd„Z dd„Z dd„Zdd„Zdd„Zdd„Zdd„Zdd„Zdd „Zd!S)"ÚTestEntitySubstitutionz1Standalone tests of the EntitySubstitution class.cCs
t|_dSr<)rÚsub©r    r r r Ú setup_methodsz#TestEntitySubstitution.setup_methodzoriginal,substituted)ufoo∀☃õbarufoo&forall;☃&otilde;bar)u‘’foo“”z&lsquo;&rsquo;foo&ldquo;&rdquo;cCs|j |¡|kst‚dSr<©reZsubstitute_htmlr)r    ÚoriginalZ substitutedr r r Útest_substitute_htmlsz+TestEntitySubstitution.test_substitute_htmlcCs:dD]0\}}d}||}||}|j |¡|kst‚qdS)N)
)z&models;u⊧)z&Nfr;u𝔑)z&ngeqq;u≧̸)z&not;õ¬)z&Not;u⫬z||)Úfjrl)z&gt;ú>)z&lt;ú<)z&amp;ú&z3 %s 4rh)r    ÚentityÚuÚtemplateÚrawZ with_entitiesr r r Útest_html5_entity%s
 z(TestEntitySubstitution.test_html5_entitycCs<d}d}|j |¡|kst‚d}d}|j |¡|ks8t‚dS)Nufjords âŠ” penguinszfjords &sqcup; penguinsufjords âŠ”ï¸€ penguinszfjords &sqcups; penguinsrh)r    r:r
r r r Ú)test_html5_entity_with_variation_selectorCs z@TestEntitySubstitution.test_html5_entity_with_variation_selectorcCsd}|j |d¡|kst‚dS)NúWelcome to "my bar"F©reZsubstitute_xmlr©r    Úsr r r ÚItest_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falseOsz`TestEntitySubstitution.test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falsecCs0|j dd¡dkst‚|j dd¡dks,t‚dS)NZWelcomeTz    "Welcome"z    Bob's Barz "Bob's Bar"rwrfr r r Ú6test_xml_attribute_quoting_normally_uses_double_quotesSszMTestEntitySubstitution.test_xml_attribute_quoting_normally_uses_double_quotescCsd}|j |d¡dkst‚dS)NrvTz'Welcome to "my bar"'rwrxr r r ÚOtest_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesWszfTestEntitySubstitution.test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotescCsd}|j |d¡dkst‚dS)NúWelcome to "Bob's Bar"Tz""Welcome to &quot;Bob's Bar&quot;"rwrxr r r Úbtest_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes[szyTestEntitySubstitution.test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotescCsd}|j |¡|kst‚dS)Nr}rw)r    Úquotedr r r Ú<test_xml_quotes_arent_escaped_when_value_is_not_being_quoted_szSTestEntitySubstitution.test_xml_quotes_arent_escaped_when_value_is_not_being_quotedcCs|j d¡dkst‚dS)Nzfoo<bar>zfoo&lt;bar&gt;rwrfr r r Ú'test_xml_quoting_handles_angle_bracketscsz>TestEntitySubstitution.test_xml_quoting_handles_angle_bracketscCs|j d¡dkst‚dS)NzAT&TzAT&amp;Trwrfr r r Ú#test_xml_quoting_handles_ampersandsfsz:TestEntitySubstitution.test_xml_quoting_handles_ampersandscCs|j d¡dkst‚dS)Nú &Aacute;T&Tz&amp;Aacute;T&amp;Trwrfr r r ÚEtest_xml_quoting_including_ampersands_when_they_are_part_of_an_entityisz\TestEntitySubstitution.test_xml_quoting_including_ampersands_when_they_are_part_of_an_entitycCs|j d¡dkst‚dS)Nrƒz&Aacute;T&amp;T)reZ"substitute_xml_containing_entitiesrrfr r r ÚDtest_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entitylsz[TestEntitySubstitution.test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entitycCsd}|j |¡|kst‚dS)z:There's no need to do this except inside attribute values.z Bob's "bar"Nrh)r    Útextr r r Ú test_quotes_not_html_substitutedosz7TestEntitySubstitution.test_quotes_not_html_substitutedN)r,r-r.r/rgr0r1r2rjrtrurzr{r|r~r€rr‚r„r…r‡r r r r rds*øþ
 rd) r0r@r?rZ
bs4.dammitrrrÚobjectrr3rdr r r r Ú<module>s E?