1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
U
¡ý°d†Jã
@sdZdgZddlZddlZddlmZmZmZmZm    Z    ddl
m Z m Z ddl Z ddlmZmZddl
mZmZmZmZzddlmZd    ZWn2ek
r¼Zzdd
lmZd ZW5dZ[XYnXGd d„de    ƒZGd d„dejƒZGdd„deƒZ Gdd„dej!ƒZ"Gdd„de"ƒZ#dS)ÚMITÚHTML5TreeBuilderéN)ÚDetectsXMLParsedAsHTMLÚ
PERMISSIVEÚHTMLÚHTML_5ÚHTMLTreeBuilder)ÚNamespacedAttributeÚnonwhitespace_re)Ú
namespacesÚprefixes)ÚCommentÚDoctypeÚNavigableStringÚTag)Ú_baseF)ÚbaseTc@sFeZdZdZdZeeeegZdZ    d dd„Z
dd„Z d    d
„Z d d „Z dS)raUse html5lib to build a tree.
 
    Note that this TreeBuilder does not support some features common
    to HTML TreeBuilders. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.
 
    * This TreeBuilder doesn't use different subclasses of NavigableString
      based on the name of the tag in which the string was found.
 
    * You can't use a SoupStrainer to parse only part of a document.
    Úhtml5libTNccs4||_|rtjdddt |¡|dddfVdS)NzjYou provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.é©Ú
stacklevelF)Úuser_specified_encodingÚwarningsÚwarnrZwarn_if_markup_looks_like_xml)ÚselfÚmarkuprZdocument_declared_encodingZexclude_encodings©rúLd:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\bs4/builder/_html5lib.pyÚprepare_markup@sþ
zHTML5TreeBuilder.prepare_markupcCsª|jjdk    rtjdddtj|jd}||j_t    ƒ}t
|t ƒsZt rP|j |d<n
|j |d<|j|f|Ž}t
|t ƒrzd|_n$|jjjd}t
|t ƒs˜|j}||_d|j_dS)Nz‚You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.ér)ÚtreeÚoverride_encodingÚencodingr)ÚsoupZ
parse_onlyrrrÚ
HTMLParserÚcreate_treebuilderÚunderlying_builderÚparserÚdictÚ
isinstanceÚstrÚ new_html5librÚparseÚoriginal_encodingÚ    tokenizerÚstreamÚ charEncodingÚname)rrr'Ú extra_kwargsÚdocr-rrrÚfeedUs( þ
 
 
 
zHTML5TreeBuilder.feedcCst||j|jd|_|jS)N)Ústore_line_numbers)ÚTreeBuilderForHtml5libr#r5r&)rÚnamespaceHTMLElementsrrrr%ts þz#HTML5TreeBuilder.create_treebuildercCsd|S)zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html>r)rÚfragmentrrrÚtest_fragment_to_document{sz*HTML5TreeBuilder.test_fragment_to_document)NN)Ú__name__Ú
__module__Ú __qualname__Ú__doc__ÚNAMErrrÚfeaturesZTRACKS_LINE_NUMBERSrr4r%r9rrrrr*s  ÿ
csfeZdZd‡fdd„    Zdd„Zdd„Zd    d
„Zd d „Zd d„Zdd„Z    dd„Z
dd„Z dd„Z ‡Z S)r6NTc sL|r ||_n ddlm}|dd|i|—Ž|_tt|ƒ |¡d|_||_dS)Nr©Ú BeautifulSoupÚú html.parserr5)rBrC)r#Úbs4rAÚsuperr6Ú__init__r'r5)rr7r#r5ÚkwargsrA©Ú    __class__rrrF‚s ÿÿþzTreeBuilderForHtml5lib.__init__cCs|j ¡t|j|jdƒS©N)r#ÚresetÚElement©rrrrÚ documentClass˜s
z$TreeBuilderForHtml5lib.documentClasscCs6|d}|d}|d}t |||¡}|j |¡dS)Nr1ÚpublicIdÚsystemId)rZfor_name_and_idsr#Úobject_was_parsed)rÚtokenr1rOrPÚdoctyperrrÚ insertDoctypeœs
z$TreeBuilderForHtml5lib.insertDoctypecCsVi}|jr6|jr6|jjj ¡\}}||d<|d|d<|jj||f|Ž}t||j|ƒS)NÚ
sourcelineéÚ    sourcepos)r'r5r.r/Úpositionr#Únew_tagrL)rr1Ú    namespacerGrUrWÚtagrrrÚ elementClass¤s  z#TreeBuilderForHtml5lib.elementClasscCstt|ƒ|jƒSrJ)ÚTextNoder r#)rÚdatarrrÚ commentClass±sz#TreeBuilderForHtml5lib.commentClasscCs0ddlm}|ddƒ|_d|j_t|j|jdƒS)Nrr@rBrCz[document_fragment])rDrAr#r1rL)rrArrrÚ fragmentClass´s  z$TreeBuilderForHtml5lib.fragmentClasscCs|j |j¡dSrJ)r#ÚappendÚelement©rÚnoderrrÚ appendChild¼sz"TreeBuilderForHtml5lib.appendChildcCs|jSrJ)r#rMrrrÚ getDocumentÀsz"TreeBuilderForHtml5lib.getDocumentcCstj |¡jSrJ)Útreebuilder_baseÚ TreeBuilderÚ getFragmentrbrMrrrriÃsz"TreeBuilderForHtml5lib.getFragmentcsBddlm‰g‰t d¡‰d‡‡‡‡fdd„    ‰ˆ|dƒd ˆ¡S)Nrr@z8^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$c    sæt|ˆƒr
t|tƒr¨ˆ |¡}|r| d¡}|jdkrx| d¡pBd}| d¡pZ| d¡pZd}ˆ dd||||f¡q¤ˆ dd||f¡nˆ d    d|f¡n:t|tƒr̈ d
d||f¡nt|tƒrd d||f¡nô|jr d t    |j|j
f}n|j
}ˆ d d||f¡|j rÂg}t |j   ¡ƒD]N\}}t|tƒrjd t    |j|j
f}t|t ƒr€d |¡}| ||f¡qBt|ƒD]&\}}ˆ dd|d||f¡qš|d7}|jD]}ˆ||ƒqÐdS)NrVérBrrz|%s<!DOCTYPE %s "%s" "%s">ú z|%s<!DOCTYPE %s>z|%s<!DOCTYPE >z|%s<!-- %s -->z|%s"%s"z%s %sz|%s<%s>z
|%s%s="%s")r)rÚmatchÚgroupÚ    lastindexrar rrZr r1ÚattrsÚlistÚitemsr    ÚjoinÚsortedÚchildren)    rbÚindentÚmr1rOrPÚ
attributesÚvalueÚchild©rAZ
doctype_reÚrvÚserializeElementrrr|ËsH
 
 
 
 
ÿ
 
 
ÿ  
 
z?TreeBuilderForHtml5lib.testSerializer.<locals>.serializeElementÚ
)r)rDrAÚreÚcompilerr©rrbrrzrÚtestSerializerÆs  
)
z%TreeBuilderForHtml5lib.testSerializer)NT)r:r;r<rFrNrTr\r_r`rerfrirÚ __classcell__rrrHrr6€sÿ r6c@sLeZdZdd„Zdd„Zdd„Zdd„Zd    d
„Zd d „Zd d„Z    dd„Z
dS)ÚAttrListcCs||_t|jjƒ|_dSrJ)rbr(ror€rrrrFùszAttrList.__init__cCst|j ¡ƒ ¡SrJ)rprorqÚ__iter__rMrrrr„üszAttrList.__iter__cCs^|jjp
i}|| dg¡ks<|jj|krP|| |jjg¡krPt|tƒsPt |¡}||j|<dS)NÚ*)rbZcdata_list_attributesÚgetr1r)rpr
Úfindall)rr1rxZ    list_attrrrrÚ __setitem__þs 
ÿþ
 
zAttrList.__setitem__cCst|j ¡ƒSrJ)rprorqrMrrrrq
szAttrList.itemscCst|j ¡ƒSrJ©rproÚkeysrMrrrrŠ sz AttrList.keyscCs
t|jƒSrJ)ÚlenrorMrrrÚ__len__szAttrList.__len__cCs
|j|SrJ)ro©rr1rrrÚ __getitem__szAttrList.__getitem__cCs|t|j ¡ƒkSrJr‰rrrrÚ __contains__szAttrList.__contains__N) r:r;r<rFr„rˆrqrŠrŒrŽrrrrrrƒøs rƒc@sxeZdZdd„Zdd„Zdd„Zdd„ZeeeƒZdd
d „Z    d d „Z
dd„Z dd„Z dd„Z dd„Zdd„ZeeƒZd    S)rLcCs&tj ||j¡||_||_||_dSrJ)rgÚNoderFr1rbr#rZ)rrbr#rZrrrrFszElement.__init__cCs*d}}t|tƒr|}}n:t|tƒr,|}n*|jjtkrJ|j}}||_n |j}||_t|tƒst|jdk    rt|j ¡|dk    rÆ|jjrÆ|jjdjtkrÆ|jjd}|j     
||¡}|  |¡||j    _ n`t|tƒrÜ|j     
|¡}|jjrò|j  d¡}n |jjdk    r |j      ¡}n|j}|j    j||j|ddS)NéÿÿÿÿF)ÚparentÚmost_recent_element)r)r*rrbrIrr’ÚextractÚcontentsr#Ú
new_stringÚ replace_withZ_most_recent_elementÚ_last_descendantÚ next_elementrQ)rrdZ string_childryZ old_elementZ new_elementr“rrrres>
 
 
 
 
ÿ 
 
 
  þzElement.appendChildcCst|jtƒriSt|jƒSrJ)r)rbr rƒrMrrrÚ getAttributesSs zElement.getAttributescCs”|dk    rt|ƒdkrg}t| ¡ƒD](\}}t|tƒr$t|Ž}||=|||<q$|jj |j    |¡t| ¡ƒD]\}}||j
|<ql|jj  |j
¡dS)Nr) r‹rprqr)Útupler    r#ZbuilderZ$_replace_cdata_list_attribute_valuesr1rbZset_up_substitutions)rrwZconverted_attributesr1rxÚnew_namerrrÚ setAttributesXs
 
ÿ zElement.setAttributesNcCs4t|j |¡|jƒ}|r&| ||¡n
| |¡dSrJ)r]r#r–Ú insertBeforere)rr^ržÚtextrrrÚ
insertTextnszElement.insertTextcCs€|j |j¡}|jjtkrf|jjrf|jj|djtkrf|jj|d}|j ||j¡}| |¡n|j ||j¡||_    dS)NrV)
rbÚindexrIrr•r#r–r—Úinsertr’)rrdÚrefNoder¡Zold_nodeZnew_strrrrržusÿ zElement.insertBeforecCs|j ¡dSrJ)rbr”rcrrrÚ removeChildszElement.removeChildc Csø|j}|j}|j}| dd¡}t|jƒdkr>|jd}|j}n
d}|j}|j}t|ƒdkrÌ|d}    |dk    rr||    _n||    _||    _|dk    rŽ|    |_n|    |_|dk    r¢|    |_|d dd¡}
||
_|dk    rÆ|
|_d|
_|D]} || _|j     | ¡qÐg|_||_dS)z1Move all of this tag's children into another tag.Frr‘NT)
rbZ next_siblingr˜r‹r•r™Zprevious_elementZprevious_siblingr’ra) rZ
new_parentrbZnew_parent_elementZfinal_next_elementZnew_parents_last_descendantZnew_parents_last_childZ(new_parents_last_descendant_next_elementZ    to_appendZ first_childZlast_childs_last_descendantryrrrÚreparentChildren„s> 
 zElement.reparentChildrencCsB|j |jj|j¡}t||j|jƒ}|jD]\}}||j|<q*|SrJ)r#rYrbr1rZrLrw)rr[rdÚkeyrxrrrÚ    cloneNodeÆs
 zElement.cloneNodecCs|jjSrJ)rbr•rMrrrÚ
hasContentÍszElement.hasContentcCs(|jdkrtd|jfS|j|jfSdS)NÚhtml)rZr r1rMrrrÚ getNameTupleÐs
zElement.getNameTuple)N)r:r;r<rFreršrÚpropertyrwr ržr¤r¥r§r¨rªÚ    nameTuplerrrrrLs6
 
 BrLc@seZdZdd„Zdd„ZdS)r]cCstj |d¡||_||_dSrJ)rgrrFrbr#)rrbr#rrrrFÙszTextNode.__init__cCst‚dSrJ)ÚNotImplementedErrorrMrrrr§ÞszTextNode.cloneNodeN)r:r;r<rFr§rrrrr]Øsr])$Ú __license__Ú__all__rr~Z bs4.builderrrrrrZ bs4.elementr    r
rZhtml5lib.constantsr r r rrrZhtml5lib.treebuildersrrgr+Ú ImportErrorÚerrrhr6ÚobjectrƒrrLr]rrrrÚ<module>s*ÿ  VxC