zmc
2023-12-22 9fdbf60165db0400c2e8e6be2dc6e88138ac719a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
U
M±d5.ã@sªddlmZddlmZddlmZddlmZmZm    Z    m
Z
m Z m Z m Z ddlmZddlmZmZmZGdd    „d    ƒZGd
d „d ƒZe eefZe
eZGd d „d ƒZdS)é)Úaliases)Úsha256)Údumps)ÚAnyÚDictÚIteratorÚListÚOptionalÚTupleÚUnioné)ÚTOO_BIG_SEQUENCE)Ú    iana_nameÚis_multi_byte_encodingÚ unicode_rangec@sÎeZdZd8eeeedeedœdd„Ze    edœdd„Z
e    edœd    d
„Z e ed œd d „ƒZ ed œdd„Zed œdd„Zdddœdd„Ze ed œdd„ƒZe eed œdd„ƒZe ed œdd„ƒZe ed œdd„ƒZe eed œdd„ƒZe ed œdd„ƒZe ed œd d!„ƒZe ed œd"d#„ƒZe ed œd$d%„ƒZe ed œd&d'„ƒZe ed œd(d)„ƒZe edd œd*d+„ƒZe ed œd,d-„ƒZe eed œd.d/„ƒZe eed œd0d1„ƒZ d9eed3œd4d5„Z!e ed œd6d7„ƒZ"dS):Ú CharsetMatchNÚCoherenceMatches)ÚpayloadÚguessed_encodingÚmean_mess_ratioÚhas_sig_or_bomÚ    languagesÚdecoded_payloadcCsF||_||_||_||_||_d|_g|_d|_d|_d|_    ||_
dS)Nç) Ú_payloadÚ    _encodingÚ_mean_mess_ratioÚ
_languagesÚ_has_sig_or_bomÚ_unicode_rangesÚ_leavesZ_mean_coherence_ratioÚ_output_payloadÚ_output_encodingÚ_string)Úselfrrrrrr©r%úPd:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\charset_normalizer/models.pyÚ__init__ s    zCharsetMatch.__init__)ÚotherÚreturncCs>t|tƒs&td t|jƒt|jƒ¡ƒ‚|j|jko<|j|jkS)Nz&__eq__ cannot be invoked on {} and {}.)Ú
isinstancerÚ    TypeErrorÚformatÚstrÚ    __class__ÚencodingÚ fingerprint©r$r(r%r%r&Ú__eq__$s
ÿÿzCharsetMatch.__eq__cCsvt|tƒst‚t|j|jƒ}t|j|jƒ}|dkrj|dkrj|dkr^|j|jkr^|j|jkS|j|jkS|j|jkS)zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{®Gáz„?g{®Gáz”?r)r*rÚ
ValueErrorÚabsÚchaosÚ    coherenceÚmulti_byte_usage)r$r(Zchaos_differenceZcoherence_differencer%r%r&Ú__lt__-s
  zCharsetMatch.__lt__©r)cCsdtt|ƒƒt|jƒS)Ngð?)Úlenr-Úraw©r$r%r%r&r7@szCharsetMatch.multi_byte_usagecCs"|jdkrt|j|jdƒ|_|jS)NÚstrict)r#r-rrr<r%r%r&Ú__str__Ds
zCharsetMatch.__str__cCsd |j|j¡S)Nz<CharsetMatch '{}' bytes({})>)r,r/r0r<r%r%r&Ú__repr__JszCharsetMatch.__repr__cCs8t|tƒr||kr"td |j¡ƒ‚d|_|j |¡dS)Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r*rr3r,r.r#r Úappendr1r%r%r&Ú add_submatchMsÿÿzCharsetMatch.add_submatchcCs|jS©N)rr<r%r%r&r/XszCharsetMatch.encodingcCsDg}t ¡D]2\}}|j|kr*| |¡q |j|kr | |¡q |S)z‚
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )rÚitemsr/r@)r$Z also_known_asÚuÚpr%r%r&Úencoding_aliases\s
 
 zCharsetMatch.encoding_aliasescCs|jSrB©rr<r%r%r&ÚbomiszCharsetMatch.bomcCs|jSrBrGr<r%r%r&Úbyte_order_markmszCharsetMatch.byte_order_markcCsdd„|jDƒS)zÔ
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        cSsg|] }|d‘qS)rr%)Ú.0Úer%r%r&Ú
<listcomp>wsz*CharsetMatch.languages.<locals>.<listcomp>©rr<r%r%r&rqszCharsetMatch.languagescCsp|jsbd|jkrdSddlm}m}t|jƒr8||jƒn||jƒ}t|ƒdksVd|krZdS|dS|jddS)z’
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        ÚasciiZEnglishr)Úencoding_languagesÚmb_encoding_languagesz Latin BasedÚUnknown)rÚcould_be_from_charsetZcharset_normalizer.cdrOrPrr/r:)r$rOrPrr%r%r&Úlanguageys
ÿ ýzCharsetMatch.languagecCs|jSrB)rr<r%r%r&r5•szCharsetMatch.chaoscCs|js
dS|jddS)Nrrr rMr<r%r%r&r6™szCharsetMatch.coherencecCst|jdddS©Nédé)Úndigits)Úroundr5r<r%r%r&Ú percent_chaosŸszCharsetMatch.percent_chaoscCst|jdddSrT)rXr6r<r%r%r&Úpercent_coherence£szCharsetMatch.percent_coherencecCs|jS)z+
        Original untouched bytes.
        )rr<r%r%r&r;§szCharsetMatch.rawcCs|jSrB)r r<r%r%r&Úsubmatch®szCharsetMatch.submatchcCst|jƒdkS©Nr)r:r r<r%r%r&Ú has_submatch²szCharsetMatch.has_submatchcCs@|jdk    r|jSdd„t|ƒDƒ}ttdd„|Dƒƒƒ|_|jS)NcSsg|] }t|ƒ‘qSr%)r)rJÚcharr%r%r&rL»sz*CharsetMatch.alphabets.<locals>.<listcomp>cSsh|] }|r|’qSr%r%)rJÚrr%r%r&Ú    <setcomp>¿sz)CharsetMatch.alphabets.<locals>.<setcomp>)rr-ÚsortedÚlist)r$Zdetected_rangesr%r%r&Ú    alphabets¶s
ÿzCharsetMatch.alphabetscCs|jgdd„|jDƒS)zÜ
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        cSsg|]
}|j‘qSr%)r/)rJÚmr%r%r&rLÉsz6CharsetMatch.could_be_from_charset.<locals>.<listcomp>)rr r<r%r%r&rRÂsz"CharsetMatch.could_be_from_charsetÚutf_8)r/r)cCs2|jdks|j|kr,||_t|ƒ |d¡|_|jS)z®
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        NÚreplace)r"r-Úencoder!)r$r/r%r%r&ÚoutputËszCharsetMatch.outputcCst| ¡ƒ ¡S)zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )rrhÚ    hexdigestr<r%r%r&r0ÖszCharsetMatch.fingerprint)N)re)#Ú__name__Ú
__module__Ú __qualname__Úbytesr-ÚfloatÚboolr    r'Úobjectr2r8Úpropertyr7r>r?rAr/rrFrHrIrrSr5r6rYrZr;r[r]rcrRrhr0r%r%r%r&r
sbùù         rc@s¢eZdZdZdeeedœdd„Zeedœdd„Z    e
e e fed    œd
d „Z e dœd d „Zedœdd„Zedd    œdd„Zeddœdd„Zeddœdd„ZdS)ÚCharsetMatchesz³
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    N)ÚresultscCs|r t|ƒng|_dSrB)raÚ_results)r$rsr%r%r&r'äszCharsetMatches.__init__r9ccs|jEdHdSrB©rtr<r%r%r&Ú__iter__çszCharsetMatches.__iter__)Úitemr)cCsNt|tƒr|j|St|tƒrFt|dƒ}|jD]}||jkr.|Sq.t‚dS)z¸
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        FN)r*Úintrtr-rrRÚKeyError)r$rwÚresultr%r%r&Ú __getitem__ês
 
 
 
 
 
 
zCharsetMatches.__getitem__cCs
t|jƒSrB©r:rtr<r%r%r&Ú__len__øszCharsetMatches.__len__cCst|jƒdkSr\r|r<r%r%r&Ú__bool__ûszCharsetMatches.__bool__cCs|t|tƒstd t|jƒ¡ƒ‚t|jƒtkr`|j    D],}|j
|j
kr2|j |j kr2|  |¡dSq2|j      |¡t|j    ƒ|_    dS)z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r*rr3r,r-r.r:r;r rtr0r5rAr@ra)r$rwÚmatchr%r%r&r@þs
ÿÿ
 
 zCharsetMatches.appendrcCs|js
dS|jdS)zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nrrur<r%r%r&ÚbestszCharsetMatches.bestcCs| ¡S)zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r€r<r%r%r&ÚfirstszCharsetMatches.first)N)rjrkrlÚ__doc__r    rrr'rrvr rxr-r{r}ror~r@r€rr%r%r%r&rrÞsrrc @sjeZdZeeeeeeeeeeeeeeeedœ dd„Ze    e
ee fdœdd„ƒZ edœdd„Z d    S)
ÚCliDetectionResult© Úpathr/rFÚalternative_encodingsrSrcrr5r6Ú unicode_pathÚ is_preferredc CsF||_|
|_||_||_||_||_||_||_||_|    |_    | |_
dSrB) r…r‡r/rFr†rSrcrr5r6rˆ) r$r…r/rFr†rSrcrr5r6r‡rˆr%r%r&r'&szCliDetectionResult.__init__r9c Cs2|j|j|j|j|j|j|j|j|j|j    |j
dœ S)Nr„r„r<r%r%r&Ú__dict__@sõzCliDetectionResult.__dict__cCst|jdddS)NTé)Ú ensure_asciiÚindent)rr‰r<r%r%r&Úto_jsonPszCliDetectionResult.to_jsonN)rjrkrlr-r    rrornr'rqrrr‰rr%r%r%r&rƒ%sô rƒN)Zencodings.aliasesrÚhashlibrÚjsonrÚtypingrrrrr    r
r Zconstantr Úutilsrrrrrrr-rnZCoherenceMatchrrƒr%r%r%r&Ú<module>s   $ UC