HEX
Server: Apache
System: Linux sh-pro142.hostgator.com.br 5.14.0-162.23.1.9991722448259.nf.el9.x86_64 #1 SMP PREEMPT_DYNAMIC Wed Jul 31 18:11:45 UTC 2024 x86_64
User: okform09 (1324)
PHP: 8.3.30
Disabled: NONE
Upload Files
File: //usr/libexec/oracle-cloud-agent/plugins/osms/charset_normalizer/utils.pyc
a

ٓ�fv.�@slddlZddlZddlZddlmZddlmZddlmZddl	m
Z
ddlmZm
Z
mZmZmZmZddlmZdd	lmZmZmZmZmZmZeed
�eed�dd
��Zeed
�eed�dd��Zeed
�eeed�dd��Zeed
�eed�dd��Z eed
�eed�dd��Z!eed
�eed�dd��Z"eed
�eed�dd��Z#eed
�eed�dd��Z$eed
�eed�dd��Z%eed
�eed�dd��Z&eed
�eed�d d!��Z'eed
�eed�d"d#��Z(eed
�eed�d$d%��Z)eed
�eed�d&d'��Z*eed
�eed�d(d)��Z+eed
�eed�d*d+��Z,ee-e�d
�eed,�d-d.��Z.eed
�eed�d/d0��Z/dSe0e1eed2�d3d4�Z2ed5d
�eed6�d7d8��Z3e0eeee0fd9�d:d;�Z4eed<�d=d>�Z5dTeeed@�dAdB�Z6ee
edC�dDdE�Z7eee8dF�dGdH�Z9eeedF�dIdJ�Z:dKej;dLfee1eddM�dNdO�Z<dUe0ee=e1eee0eeeeeddfdP�
dQdR�Z>dS)V�N)�IncrementalDecoder)�aliases)�	lru_cache)�findall)�	Generator�List�Optional�Set�Tuple�Union)�MultibyteIncrementalDecoder�)�ENCODING_MARKS�IANA_SUPPORTED_SIMILAR�RE_POSSIBLE_ENCODING_INDICATION�UNICODE_RANGES_COMBINED�UNICODE_SECONDARY_RANGE_KEYWORD�UTF8_MAXIMAL_ALLOCATION)�maxsize)�	character�returncCsdzt�|�}Wnty"YdS0d|vpbd|vpbd|vpbd|vpbd|vpbd|vpbd|vpbd	|vS)
NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEzWITH MACRONzWITH RING ABOVE��unicodedata�name�
ValueError�r�description�r�|/sparta/input/_build_configuration/image_build+validate/lib/bmcenv/lib64/python3.9/site-packages/charset_normalizer/utils.py�is_accentuateds&�������rcCs.t�|�}|s|S|�d�}tt|dd��S)N� r�)r�
decomposition�split�chr�int)r�
decomposed�codesrrr�
remove_accent(s


r(cCs.t|�}t��D]\}}||vr|SqdS)zK
    Retrieve the Unicode range official name from a single character.
    N)�ordr�items)r�
character_ord�
range_name�	ord_rangerrr�
unicode_range3s

r.cCs,zt�|�}Wnty"YdS0d|vS)NF�LATINrrrrr�is_latinAs
r0cCs2t�|�}d|vrdSt|�}|dur*dSd|vS)N�PTF�Punctuation�r�categoryr.�r�character_category�character_rangerrr�is_punctuationJs
r8cCsBt�|�}d|vsd|vrdSt|�}|dur2dSd|vo@|dkS)N�S�NTF�Forms�Lor3r5rrr�	is_symbolYs
r=cCs$t|�}|durdSd|vp"d|vS)NF�	Emoticons�Pictographs)r.)rr7rrr�is_emoticonhsr@cCs.|��s|dvrdSt�|�}d|vp,|dvS)N>�+u|�>�<T�Z>�Pd�Po�Pc)�isspacerr4)rr6rrr�is_separatorrs
rIcCs|��|��kS�N)�islower�isupper�rrrr�is_case_variable|srNcCs,zt�|�}Wnty"YdS0d|vS)NF�CJKr�r�character_namerrr�is_cjk�s
rRcCs,zt�|�}Wnty"YdS0d|vS)NF�HIRAGANArrPrrr�is_hiragana�s
rTcCs,zt�|�}Wnty"YdS0d|vS)NF�KATAKANArrPrrr�is_katakana�s
rVcCs,zt�|�}Wnty"YdS0d|vS)NF�HANGULrrPrrr�	is_hangul�s
rXcCs,zt�|�}Wnty"YdS0d|vS)NF�THAIrrPrrr�is_thai�s
rZcCs,zt�|�}Wnty"YdS0d|vS)NF�ARABICrrPrrr�	is_arabic�s
r\cCs4zt�|�}Wnty"YdS0d|vo2d|vS)NFr[z
ISOLATED FORMrrPrrr�is_arabic_isolated_form�s
r])r,rcst�fdd�tD��S)Nc3s|]}|�vVqdSrJr)�.0�keyword�r,rr�	<genexpr>��z-is_unicode_range_secondary.<locals>.<genexpr>)�anyrr`rr`r�is_unicode_range_secondary�srdcCs(|��duo&|��duo&|dko&|dkS)NF�u)rH�isprintablerMrrr�is_unprintable�s
���rg� )�sequence�search_zonercCs�t|t�st�t|�}tt|dt||��jddd��}t|�dkrHdS|D]N}|���	dd�}t
��D]0\}}||kr�|S||krh|SqhqLdS)zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    N�ascii�ignore��errorsr�-�_)�
isinstance�bytes�	TypeError�lenrr�min�decode�lower�replacerr*)rirj�seq_len�results�specified_encoding�encoding_alias�
encoding_ianarrr�any_specified_encoding�s"
�r~�)rrcCs |dvptt�d�|��jt�S)zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	�	utf_16_le�	utf_32_le�utf_32�utf_16�utf_7�utf_8�	utf_16_be�	utf_32_be�	utf_8_sig�encodings.{})�
issubclass�	importlib�
import_module�formatrr)rrrr�is_multi_byte_encoding�s
��r�)rircCsJtD]@}t|}t|t�r |g}|D]}|�|�r$||fSq$qdS)z9
    Identify and extract SIG/BOM in given sequence.
    )Nrb)rrqrr�
startswith)ri�
iana_encoding�marks�markrrr�identify_sig_or_boms

r�)r�rcCs|dvS)N>r�r�r)r�rrr�should_strip_sig_or_bomsr�T)�cp_name�strictrcCsL|���dd�}t��D]\}}|||fvr|Sq|rHtd�|���|S)Nrorpz Unable to retrieve IANA for '{}')rwrxrr*rr�)r�r�r|r}rrr�	iana_name"s
r�)�decoded_sequencercCs4t�}|D] }t|�}|dur q
|�|�q
t|�SrJ)�setr.�add�list)r��rangesrr7rrr�
range_scan2sr�)�iana_name_a�iana_name_brc	Cs�t|�st|�rdSt�d�|��j}t�d�|��j}|dd�}|dd�}d}td�D]*}t|g�}|�|�|�|�krX|d7}qX|dS)	Ngr�rlrmr�r
�)r�r�r�r�r�rangerrrv)	r�r��	decoder_a�	decoder_b�id_a�id_b�character_match_count�i�
to_be_decodedrrr�
cp_similarity@s ��



r�cCs|tvo|t|vS)z�
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r)r�r�rrr�
is_cp_similarXs
�r��charset_normalizerz)%(asctime)s | %(levelname)s | %(message)s)r�level�
format_stringrcCs:t�|�}|�|�t��}|�t�|��|�|�dSrJ)�logging�	getLogger�setLevel�
StreamHandler�setFormatter�	Formatter�
addHandler)rr�r��logger�handlerrrr�set_logging_handlercs


r�)
�	sequencesr}�offsets�
chunk_size�bom_or_sig_available�strip_sig_or_bom�sig_payload�is_multi_byte_decoder�decoded_payloadrc	cs*|r6|dur6|D]"}	||	|	|�}
|
s,q4|
Vqn�|D]�}	|	|}|t|�dkrXq:||	|	|�}|r||dur|||}|j||r�dndd�}
|�r|	dk�rt|d�}
|�r|
d|
�|v�rt|	|	dd	�D]H}|||�}|r�|dur�||}|j|dd�}
|
d|
�|vr��qq�|
Vq:dS)
NF�rlr�rmrr!����)rtrvrur�)r�r}r�r�r�r�r�r�r�r��chunk�	chunk_end�cut_sequence�chunk_partial_size_chk�jrrr�cut_sequence_chunksps>

�
��r�)rh)T)N)?r�r�r�codecsr�encodings.aliasesr�	functoolsr�rer�typingrrrr	r
r�_multibytecodecr�constantrrrrrr�str�boolrr(r.r0r8r=r@rIrNrRrTrVrXrZr\r]rtrdrgrrr%r~r�r�r�r�r��floatr�r��INFOr�r�r�rrrr�<module>s�  


									
 ����