
    yj                     r    d dl Z d dlZd dlmZmZ  G d de j                  Z G d dej                  Z	dS )    N)common_spec
model_specc                       e Zd ZdZdZdZdZdS )RotaryScalingTypezRoPE scaling type.r         N)__name__
__module____qualname____doc__LinearSuLlama3     a/usr/local/lib/hermes-agent/venv/lib/python3.11/site-packages/ctranslate2/specs/attention_spec.pyr   r   	   s#        F	
BFFFr   r   c                   <    e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZdS )	MultiHeadAttentionSpecFNTr   '  r   c                    t           j        | _        |rt          j        |          | _        d t          |rdnd          D             | _        |r4t          j        |          | _        t          j        |          | _	        |rt          j        d          | _
        |rd | _        d | _        |rd | _        d | _        |rd | _        d | _        d | _        |dk    r,t%          j        d                              |          | _        |dk    r,t%          j        d                              |          | _        |t%          j        d                              |          | _        || _        t%          j        d                              |
          | _        |,t%          j        d	                              |          | _        |t6          j        u r-t%          j        d                              |	          | _        n9|t6          j        u rd | _        d | _         n|t6          j!        u rd | _"        d | _#        |,t%          j        d                              |          | _$        |,t%          j        d                              |          | _%        |.t%          j        d                              |          | _&        d S d S )
N)rms_normc                 4    g | ]}t          j                    S r   )r   
LinearSpec).0_s     r   
<listcomp>z3MultiHeadAttentionSpec.__init__.<locals>.<listcomp>,   s.     
 
 
)*K"$$
 
 
r   r      Tr   int32float32int8)'r   OPTIONALqueries_scaler   LayerNormSpec
layer_normrangelinearq_normk_normv_normrelative_position_keysrelative_position_valuesrelative_attention_biasrelative_attention_max_distance!relative_asymmetric_position_keysrelative_left_max_positionrelative_right_max_positionnpdtypetype original_max_position_embeddingsmax_position_embeddings
rotary_dimrotary_interleaverotary_baserotary_scaling_typer   r   rotary_scaling_factorr   rotary_scaling_long_factorrotary_scaling_short_factorr   rotary_low_freq_factorrotary_high_freq_factornum_heads_kvhead_dimsliding_window)selfself_attentionrelative_positionrelative_asymmetric_positionr,   r   r6   r7   r9   r:   r8   r4   r5   r?   r@   rA   qk_normqk_norm_rmsr)   has_norms                       r   __init__zMultiHeadAttentionSpec.__init__   s   , (0 	K)7JJJDO
 
.34NAAQ.O.O
 
 
  	J%3[IIIDK%3[IIIDK 	C%3TBBBDK 	1*.D',0D)" 	8+/D(37D0' 	459D2.2D+/3D,+q0046HW4E4E4J4J05 5D1 #a''+-8G+<+<+A+A', ,D( ! hw//44Z@@DO%6D"!x	2277DDD".+-8F+;+;+@+@AT+U+U("&7&>>>-/Xi-@-@-E-E). .** %(9(<<<26/3700$(9(@@@.2+/3,# " 1 1 6 6| D DDHW--228<<DM%"$(7"3"3"8"8"H"HD &%r   )FFFFFNTNr   r   r   r   NNNFTFT)r	   r
   r   rI   r   r   r   r   r      so         %* % )* !)TI TI TI TI TI TIr   r   )
enumnumpyr1   ctranslate2.specsr   r   IntEnumr   	LayerSpecr   r   r   r   <module>rO      s         5 5 5 5 5 5 5 5       UI UI UI UI UIZ1 UI UI UI UI UIr   