
    iP                        d Z ddlmZ ddlmZmZmZ eeef         Ze	Z
	 eee	         ee	         f         Z	 ee
ee
e
f         ee
         f         Z	 eeeeef         ee         f         Z	 ee
ef         Z	 eeef         Z	  G d de          Z G d de          Z G d d	e          Zd
dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z! d
dl"m#Z#m$Z$m%Z%m&Z&m'Z' dS )u  Tokenizers — fast, batteries-included tokenization library.

Free-threaded Python (3.14t) note:
    Wheels built against free-threaded CPython declare ``Py_MOD_GIL_NOT_USED``
    and use ``RwLock``-guarded interior mutability so component setters are
    safe to call from multiple threads. Compound mutations
    (``tokenizer.post_processor.special_tokens = …``) are still not atomic —
    use a Python lock if you need the read-then-write to be serialized.
    See ``docs/free-threading-audit.md`` for the full analysis.
    )Enum)ListTupleUnionc                       e Zd ZdZdZdS )OffsetReferentialoriginal
normalizedN)__name__
__module____qualname__ORIGINAL
NORMALIZED     T/usr/local/lib/hermes-agent/venv/lib/python3.11/site-packages/tokenizers/__init__.pyr   r   G   s        HJJJr   r   c                       e Zd ZdZdZdS )
OffsetTypebytecharN)r   r   r   BYTECHARr   r   r   r   r   L   s        DDDDr   r   c                   "    e Zd ZdZdZdZdZdZdS )SplitDelimiterBehaviorremovedisolatedmerged_with_previousmerged_with_next
contiguousN)r   r   r   REMOVEDISOLATEDMERGED_WITH_PREVIOUSMERGED_WITH_NEXT
CONTIGUOUSr   r   r   r   r   Q   s)        GH1)JJJr   r      )
AddedTokenEncodingNormalizedStringPreTokenizedStringRegexToken	Tokenizerdecodersmodelsnormalizerspre_tokenizers
processorstrainers__version__)BertWordPieceTokenizerByteLevelBPETokenizerCharBPETokenizerSentencePieceBPETokenizerSentencePieceUnigramTokenizerN)(__doc__enumr   typingr   r   r   intOffsetsstrTextInputSequencePreTokenizedInputSequenceTextEncodeInputPreTokenizedEncodeInputInputSequenceEncodeInputr   r   r   
tokenizersr&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   implementationsr4   r5   r6   r7   r8   r   r   r   <module>rG      sv  	 	       % % % % % % % % % % S/  5!$s)U3Z"78  	
.
./	
  	
#%>
>?	"#% 
 ')BBC O%<<=       
       
    T                                                  r   