a
    -3eH                     @   sV  d dl mZ d dlmZ d dlmZmZ ddlmZm	Z	m
Z
 ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ G dd dZG d	d
 d
eZG dd deZG dd deZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#eddee$ ee$ e%dddZ&eddd%e$e'e%e'd!d"d#Z(d$S )&    )	lru_cache)	getLogger)ListOptional   )COMMON_SAFE_ASCII_CHARACTERSTRACEUNICODE_SECONDARY_RANGE_KEYWORD)is_accentuatedis_case_variableis_cjkis_emoticon	is_hangulis_hiraganais_katakanais_latinis_punctuationis_separator	is_symbolis_thaiis_unprintableremove_accentunicode_rangec                   @   sP   e Zd ZdZeedddZeddddZddd	d
Ze	e
dddZdS )MessDetectorPluginzy
    Base abstract class used for mess detection plugins.
    All detectors MUST extend and implement given methods.
    	characterreturnc                 C   s   t dS )z@
        Determine if given character should be fed in.
        NNotImplementedErrorselfr    r!   H/home/pi/bot/my_env/lib/python3.9/site-packages/charset_normalizer/md.pyeligible#   s    zMessDetectorPlugin.eligibleNc                 C   s   t dS )z
        The main routine to be executed upon character.
        Insert the logic in witch the text would be considered chaotic.
        Nr   r   r!   r!   r"   feed)   s    zMessDetectorPlugin.feedr   c                 C   s   t dS )zB
        Permit to reset the plugin to the initial state.
        Nr   r    r!   r!   r"   reset0   s    zMessDetectorPlugin.resetc                 C   s   t dS )z
        Compute the chaos ratio based on what your feed() has seen.
        Must NOT be lower than 0.; No restriction gt 0.
        Nr   r&   r!   r!   r"   ratio6   s    zMessDetectorPlugin.ratio)__name__
__module____qualname____doc__strboolr#   r$   r'   propertyfloatr(   r!   r!   r!   r"   r      s   r   c                   @   sZ   e Zd ZddddZeedddZedddd	Zddd
dZe	e
dddZdS ) TooManySymbolOrPunctuationPluginNr%   c                 C   s"   d| _ d| _d| _d | _d| _d S )Nr   F)_punctuation_count_symbol_count_character_count_last_printable_charZ_frenzy_symbol_in_wordr&   r!   r!   r"   __init__@   s
    z)TooManySymbolOrPunctuationPlugin.__init__r   c                 C   s   |  S Nisprintabler   r!   r!   r"   r#   H   s    z)TooManySymbolOrPunctuationPlugin.eligiblec                 C   sp   |  j d7  _ || jkrf|tvrft|r8|  jd7  _n.| du rft|rft|du rf|  jd7  _|| _d S )Nr   F   )	r4   r5   r   r   r2   isdigitr   r   r3   r   r!   r!   r"   r$   K   s    

z%TooManySymbolOrPunctuationPlugin.feedc                 C   s   d| _ d| _d| _d S Nr   )r2   r4   r3   r&   r!   r!   r"   r'   ]   s    z&TooManySymbolOrPunctuationPlugin.resetc                 C   s0   | j dkrdS | j| j | j  }|dkr,|S dS )Nr           333333?)r4   r2   r3   )r    Zratio_of_punctuationr!   r!   r"   r(   b   s    

z&TooManySymbolOrPunctuationPlugin.ratior)   r*   r+   r6   r-   r.   r#   r$   r'   r/   r0   r(   r!   r!   r!   r"   r1   ?   s   r1   c                   @   sZ   e Zd ZddddZeedddZedddd	Zddd
dZe	e
dddZdS )TooManyAccentuatedPluginNr%   c                 C   s   d| _ d| _d S r<   r4   _accentuated_countr&   r!   r!   r"   r6   o   s    z!TooManyAccentuatedPlugin.__init__r   c                 C   s   |  S r7   )isalphar   r!   r!   r"   r#   s   s    z!TooManyAccentuatedPlugin.eligiblec                 C   s(   |  j d7  _ t|r$|  jd7  _d S Nr   )r4   r
   rB   r   r!   r!   r"   r$   v   s    zTooManyAccentuatedPlugin.feedc                 C   s   d| _ d| _d S r<   rA   r&   r!   r!   r"   r'   |   s    zTooManyAccentuatedPlugin.resetc                 C   s4   | j dks| j dk rdS | j| j  }|dkr0|S dS )Nr      r=   gffffff?rA   )r    Zratio_of_accentuationr!   r!   r"   r(      s    zTooManyAccentuatedPlugin.ratior?   r!   r!   r!   r"   r@   n   s   r@   c                   @   sZ   e Zd ZddddZeedddZedddd	Zddd
dZe	e
dddZdS )UnprintablePluginNr%   c                 C   s   d| _ d| _d S r<   )_unprintable_countr4   r&   r!   r!   r"   r6      s    zUnprintablePlugin.__init__r   c                 C   s   dS NTr!   r   r!   r!   r"   r#      s    zUnprintablePlugin.eligiblec                 C   s(   t |r|  jd7  _|  jd7  _d S rD   )r   rG   r4   r   r!   r!   r"   r$      s    zUnprintablePlugin.feedc                 C   s
   d| _ d S r<   )rG   r&   r!   r!   r"   r'      s    zUnprintablePlugin.resetc                 C   s   | j dkrdS | jd | j  S )Nr   r=   rE   )r4   rG   r&   r!   r!   r"   r(      s    
zUnprintablePlugin.ratior?   r!   r!   r!   r"   rF      s   rF   c                   @   sZ   e Zd ZddddZeedddZedddd	Zddd
dZe	e
dddZdS )SuspiciousDuplicateAccentPluginNr%   c                 C   s   d| _ d| _d | _d S r<   _successive_countr4   _last_latin_characterr&   r!   r!   r"   r6      s    z(SuspiciousDuplicateAccentPlugin.__init__r   c                 C   s   |  ot|S r7   )rC   r   r   r!   r!   r"   r#      s    z(SuspiciousDuplicateAccentPlugin.eligiblec                 C   st   |  j d7  _ | jd urjt|rjt| jrj| rJ| j rJ|  jd7  _t|t| jkrj|  jd7  _|| _d S rD   )r4   rL   r
   isupperrK   r   r   r!   r!   r"   r$      s    z$SuspiciousDuplicateAccentPlugin.feedc                 C   s   d| _ d| _d | _d S r<   rJ   r&   r!   r!   r"   r'      s    z%SuspiciousDuplicateAccentPlugin.resetc                 C   s   | j dkrdS | jd | j  S )Nr   r=   r:   )r4   rK   r&   r!   r!   r"   r(      s    
z%SuspiciousDuplicateAccentPlugin.ratior?   r!   r!   r!   r"   rI      s   rI   c                   @   sZ   e Zd ZddddZeedddZedddd	Zddd
dZe	e
dddZdS )SuspiciousRangeNr%   c                 C   s   d| _ d| _d | _d S r<   )"_suspicious_successive_range_countr4   _last_printable_seenr&   r!   r!   r"   r6      s    zSuspiciousRange.__init__r   c                 C   s   |  S r7   r8   r   r!   r!   r"   r#      s    zSuspiciousRange.eligiblec                 C   sx   |  j d7  _ | s&t|s&|tv r0d | _d S | jd u rD|| _d S t| j}t|}t||rn|  jd7  _|| _d S rD   )r4   isspacer   r   rP   r    is_suspiciously_successive_rangerO   )r    r   unicode_range_aunicode_range_br!   r!   r"   r$      s"    


zSuspiciousRange.feedc                 C   s   d| _ d| _d | _d S r<   )r4   rO   rP   r&   r!   r!   r"   r'      s    zSuspiciousRange.resetc                 C   s.   | j dkrdS | jd | j  }|dk r*dS |S )Nr   r=   r:   g?)r4   rO   )r    Zratio_of_suspicious_range_usager!   r!   r"   r(      s    
zSuspiciousRange.ratior?   r!   r!   r!   r"   rN      s   rN   c                   @   sZ   e Zd ZddddZeedddZedddd	Zddd
dZe	e
dddZdS )SuperWeirdWordPluginNr%   c                 C   s:   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d S )Nr   F )	_word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchr4   _bad_character_count_buffer_buffer_accent_countr&   r!   r!   r"   r6      s    zSuperWeirdWordPlugin.__init__r   c                 C   s   dS rH   r!   r   r!   r!   r"   r#     s    zSuperWeirdWordPlugin.eligiblec                 C   s  |  r|  j|7  _t|r,|  jd7  _| jdu rt|du sJt|rt|du rt|du rt|du rt	|du rt
|du rd| _d S | jsd S | st|st|r| jr|  jd7  _t| j}|  j|7  _|dkr6| j| dkrd| _t| jd r6| jd  r6|  jd7  _d| _|dkr| jrdd	 t| jtd
|D }d}|rt|| dkrd}|s|  jd7  _d| _| jr|  jd7  _|  jt| j7  _d| _d| _d| _d
| _n6|dvr| du rt|rd| _|  j|7  _d S )Nr   FT   g(\?   c                 S   s   g | ]\}}|  r|qS r!   )rM   ).0cir!   r!   r"   
<listcomp>.  s   z-SuperWeirdWordPlugin.feed.<locals>.<listcomp>r   r>   rV   >   |~><-=_)rC   r]   r
   r^   r[   r   r   r   r   r   r   rQ   r   r   rW   lenr4   rZ   rM   rY   ziprangerX   r\   r;   r   )r    r   Zbuffer_lengthZcamel_case_dstZprobable_camel_casedr!   r!   r"   r$     s    





	

 
zSuperWeirdWordPlugin.feedc                 C   s4   d| _ d| _d| _d| _d| _d| _d| _d| _d S )NrV   Fr   )r]   rZ   r[   rX   rW   r4   r\   rY   r&   r!   r!   r"   r'   L  s    zSuperWeirdWordPlugin.resetc                 C   s$   | j dkr| jdkrdS | j| j S )N
   r   r=   )rW   rY   r\   r4   r&   r!   r!   r"   r(   V  s    zSuperWeirdWordPlugin.ratior?   r!   r!   r!   r"   rU      s   A
rU   c                   @   s^   e Zd ZdZddddZeedddZeddd	d
ZddddZ	e
edddZdS )CjkInvalidStopPluginu   
    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
    can be easily detected. Searching for the overuse of '丅' and '丄'.
    Nr%   c                 C   s   d| _ d| _d S r<   _wrong_stop_count_cjk_character_countr&   r!   r!   r"   r6   d  s    zCjkInvalidStopPlugin.__init__r   c                 C   s   dS rH   r!   r   r!   r!   r"   r#   h  s    zCjkInvalidStopPlugin.eligiblec                 C   s4   |dv r|  j d7  _ d S t|r0|  jd7  _d S )N>   u   丄u   丅r   )rs   r   rt   r   r!   r!   r"   r$   k  s
    zCjkInvalidStopPlugin.feedc                 C   s   d| _ d| _d S r<   rr   r&   r!   r!   r"   r'   r  s    zCjkInvalidStopPlugin.resetc                 C   s   | j dk rdS | j| j  S )N   r=   )rt   rs   r&   r!   r!   r"   r(   v  s    
zCjkInvalidStopPlugin.ratio)r)   r*   r+   r,   r6   r-   r.   r#   r$   r'   r/   r0   r(   r!   r!   r!   r"   rq   ^  s   rq   c                   @   sZ   e Zd ZddddZeedddZedddd	Zddd
dZe	e
dddZdS )ArchaicUpperLowerPluginNr%   c                 C   s.   d| _ d| _d| _d| _d| _d | _d| _d S )NFr   T)_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalr4   _last_alpha_seen_current_ascii_onlyr&   r!   r!   r"   r6   ~  s    z ArchaicUpperLowerPlugin.__init__r   c                 C   s   dS rH   r!   r   r!   r!   r"   r#     s    z ArchaicUpperLowerPlugin.eligiblec                 C   s$  |  ot|}|du }|r| jdkr| jdkrV| du rV| jdu rV|  j| j7  _d| _d| _d | _d| _|  j	d7  _	d| _d S | jdu r|
 du rd| _| jd ur| r| j s| r| j r| jdu r|  jd7  _d| _qd| _nd| _|  j	d7  _	|  jd7  _|| _d S )NFr   @   r   Tr:   )rC   r   rx   r;   r|   rz   ry   r{   rw   r4   isasciirM   islower)r    r   Zis_concernedZ	chunk_sepr!   r!   r"   r$     sF    


zArchaicUpperLowerPlugin.feedc                 C   s.   d| _ d| _d| _d| _d | _d| _d| _d S )Nr   FT)r4   rx   ry   rz   r{   rw   r|   r&   r!   r!   r"   r'     s    zArchaicUpperLowerPlugin.resetc                 C   s   | j dkrdS | j| j  S )Nr   r=   )r4   rz   r&   r!   r!   r"   r(     s    
zArchaicUpperLowerPlugin.ratior?   r!   r!   r!   r"   rv   }  s   *	rv      )maxsize)rS   rT   r   c                 C   s|  | du s|du rdS | |kr dS d| v r4d|v r4dS d| v sDd|v rHdS d| v sXd|v rld| v shd|v rldS |  d| d }}|D ]}|tv rq||v r dS q| dv |dv  }}|s|rd	| v sd	|v rdS |r|rdS d
| v sd
|v r d	| v sd	|v rdS | dks|dkr dS d	| v sHd	|v sH| dv rx|dv rxd| v s\d|v r`dS d| v std|v rxdS dS )za
    Determine if two Unicode range seen next to each other can be considered as suspicious.
    NTFZLatinZ	EmoticonsZ	Combining )HiraganaKatakanaCJKZHangulzBasic Latin)r   r   ZPunctuationZForms)splitr	   )rS   rT   Zkeywords_range_aZkeywords_range_belZrange_a_jp_charsZrange_b_jp_charsr!   r!   r"   rR     sh    rR   i   皙?F)decoded_sequencemaximum_thresholddebugr   c              	   C   sX  dd t  D }t| d }d}|dk r0d}n|dkr>d}nd	}t| d
 t|D ]d\}}|D ]}	|	|r`|	| q`|dkr|| dks||d krTtdd |D }||krT qqT|rNtd}
|
	t
d| d| d|  t| dkr(|
	t
d| dd   |
	t
d| dd   |D ] }|
	t
|j d|j  q,t|dS )zw
    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
    c                 S   s   g | ]
}| qS r!   r!   )rb   Zmd_classr!   r!   r"   re     s   zmess_ratio.<locals>.<listcomp>r   r=   i       r   r}      
r   c                 s   s   | ]}|j V  qd S r7   )r(   )rb   dtr!   r!   r"   	<genexpr>/      zmess_ratio.<locals>.<genexpr>Zcharset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=ru   zStarting with: NzEnding with: iz:    )r   __subclasses__rm   rn   ro   r#   r$   sumr   logr   	__class__r(   round)r   r   r   Z	detectorslengthZmean_mess_ratioZ!intermediary_mean_mess_ratio_calcr   indexdetectorloggerr   r!   r!   r"   
mess_ratio  sR    


r   N)r   F))	functoolsr   loggingr   typingr   r   Zconstantr   r   r	   utilsr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r1   r@   rF   rI   rN   rU   rq   rv   r-   r.   rR   r0   r   r!   r!   r!   r"   <module>   s.   D"/%4eLF 