zmc
2023-08-08 e792e9a60d958b93aef96050644f369feb25d61b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
U
L±d0'ã@sddlmZmZddlmZmZmZddlmZm    Z    m
Z
ddl m Z m Z mZddlmZmZmZddlmZmZmZddlmZGd    d
„d
ƒZGd d „d eƒZGd d„deƒZGdd„deƒZGdd„deƒZGdd„deƒZGdd„deƒZGdd„deƒZ dS)é)ÚTupleÚUnioné)ÚBIG5_CHAR_TO_FREQ_ORDERÚBIG5_TABLE_SIZEÚBIG5_TYPICAL_DISTRIBUTION_RATIO)ÚEUCKR_CHAR_TO_FREQ_ORDERÚEUCKR_TABLE_SIZEÚ EUCKR_TYPICAL_DISTRIBUTION_RATIO)ÚEUCTW_CHAR_TO_FREQ_ORDERÚEUCTW_TABLE_SIZEÚ EUCTW_TYPICAL_DISTRIBUTION_RATIO)ÚGB2312_CHAR_TO_FREQ_ORDERÚGB2312_TABLE_SIZEÚ!GB2312_TYPICAL_DISTRIBUTION_RATIO)ÚJIS_CHAR_TO_FREQ_ORDERÚJIS_TABLE_SIZEÚJIS_TYPICAL_DISTRIBUTION_RATIO)ÚJOHAB_TO_EUCKR_ORDER_TABLEc@s†eZdZdZdZdZdZddœdd„Zddœd    d
„Ze    e
e fe dd œd d „Z edœdd„Zedœdd„Ze    e
e fe dœdd„ZdS)ÚCharDistributionAnalysisig®Gáz®ï?g{®Gáz„?éN©ÚreturncCs2tƒ|_d|_d|_d|_d|_d|_| ¡dS)NrgF)ÚtupleÚ_char_to_freq_orderÚ _table_sizeÚtypical_distribution_ratioÚ_doneÚ _total_charsÚ _freq_charsÚreset©Úself©r#úOd:\z\workplace\vscode\pyvenv\venv\Lib\site-packages\chardet/chardistribution.pyÚ__init__@sz!CharDistributionAnalysis.__init__cCsd|_d|_d|_dS)zreset analyser, clear any stateFrN)rrrr!r#r#r$r OszCharDistributionAnalysis.reset)ÚcharÚchar_lenrcCsX|dkr| |¡}nd}|dkrT|jd7_||jkrTd|j|krT|jd7_dS)z"feed a character with known lengthééÿÿÿÿrriN)Ú    get_orderrrrr)r"r&r'Úorderr#r#r$ÚfeedXs 
zCharDistributionAnalysis.feedcCsT|jdks|j|jkr|jS|j|jkrN|j|j|j|j}||jkrN|S|jS)z(return confidence based on existing datar)rrÚMINIMUM_DATA_THRESHOLDÚSURE_NOrÚSURE_YES)r"Úrr#r#r$Úget_confidencefs ÿ
z'CharDistributionAnalysis.get_confidencecCs |j|jkS©N)rÚENOUGH_DATA_THRESHOLDr!r#r#r$Úgot_enough_datawsz(CharDistributionAnalysis.got_enough_data)Ú_rcCsdS)Nr)r#)r"r5r#r#r$r*|sz"CharDistributionAnalysis.get_order)Ú__name__Ú
__module__Ú __qualname__r3r/r.r-r%r rÚbytesÚ    bytearrayÚintr,Úfloatr1Úboolr4r*r#r#r#r$r:s    rcs:eZdZddœ‡fdd„ Zeeefedœdd„Z‡Z    S)ÚEUCTWDistributionAnalysisNrcs tƒ ¡t|_t|_t|_dSr2)Úsuperr%r rr rr rr!©Ú    __class__r#r$r%…s
z"EUCTWDistributionAnalysis.__init__©Úbyte_strrcCs,|d}|dkr(d|d|ddSdS)NréÄé^ré¡r)r#©r"rCÚ
first_charr#r#r$r*‹sz#EUCTWDistributionAnalysis.get_order©
r6r7r8r%rr9r:r;r*Ú __classcell__r#r#r@r$r>„sr>cs:eZdZddœ‡fdd„ Zeeefedœdd„Z‡Z    S)ÚEUCKRDistributionAnalysisNrcs tƒ ¡t|_t|_t|_dSr2©r?r%rrr    rr
rr!r@r#r$r%—s
z"EUCKRDistributionAnalysis.__init__rBcCs,|d}|dkr(d|d|ddSdS)Nré°rErrFr)r#rGr#r#r$r*sz#EUCKRDistributionAnalysis.get_orderrIr#r#r@r$rK–srKcs:eZdZddœ‡fdd„ Zeeefedœdd„Z‡Z    S)ÚJOHABDistributionAnalysisNrcs tƒ ¡t|_t|_t|_dSr2rLr!r@r#r$r%©s
z"JOHABDistributionAnalysis.__init__rBcCs@|d}d|krdkr<nn|d|d}t |d¡SdS)NréˆéÔérr))rÚget)r"rCrHÚcoder#r#r$r*¯s
 z#JOHABDistributionAnalysis.get_orderrIr#r#r@r$rN¨srNcs:eZdZddœ‡fdd„ Zeeefedœdd„Z‡Z    S)ÚGB2312DistributionAnalysisNrcs tƒ ¡t|_t|_t|_dSr2)r?r%rrrrrrr!r@r#r$r%¸s
z#GB2312DistributionAnalysis.__init__rBcCs:|d|d}}|dkr6|dkr6d|d|dSdS)NrrrMrFrEr)r#©r"rCrHÚ second_charr#r#r$r*¾sz$GB2312DistributionAnalysis.get_orderrIr#r#r@r$rT·srTcs:eZdZddœ‡fdd„ Zeeefedœdd„Z‡Z    S)ÚBig5DistributionAnalysisNrcs tƒ ¡t|_t|_t|_dSr2)r?r%rrrrrrr!r@r#r$r%Ês
z!Big5DistributionAnalysis.__init__rBcCsR|d|d}}|dkrN|dkr:d|d|ddSd|d|dSdS)    Nrré¤rFéé?é@r)r#rUr#r#r$r*Ðs z"Big5DistributionAnalysis.get_orderrIr#r#r@r$rWÉsrWcs:eZdZddœ‡fdd„ Zeeefedœdd„Z‡Z    S)ÚSJISDistributionAnalysisNrcs tƒ ¡t|_t|_t|_dSr2©r?r%rrrrrrr!r@r#r$r%Þs
z!SJISDistributionAnalysis.__init__rBcCs‚|d|d}}d|kr&dkr8nnd|d}n.d|krLdkrbnnd|dd}nd    S||d
}|d kr~d    }|S) NrrééŸé¼éàéïér)r[ér#)r"rCrHrVr+r#r#r$r*äs z"SJISDistributionAnalysis.get_orderrIr#r#r@r$r\Ýsr\cs:eZdZddœ‡fdd„ Zeeefedœdd„Z‡Z    S)ÚEUCJPDistributionAnalysisNrcs tƒ ¡t|_t|_t|_dSr2r]r!r@r#r$r%÷s
z"EUCJPDistributionAnalysis.__init__rBcCs,|d}|dkr(d|d|ddSdS)Nré rErFrr)r#)r"rCr&r#r#r$r*ýsz#EUCJPDistributionAnalysis.get_orderrIr#r#r@r$reösreN)!ÚtypingrrÚbig5freqrrrÚ    euckrfreqrr    r
Ú    euctwfreqr r r Ú
gb2312freqrrrÚjisfreqrrrZ    johabfreqrrr>rKrNrTrWr\rer#r#r#r$Ú<module>s J