
    yjJ                       d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZ		 d dl
Z
d dlZd dlZn# e$ r Y nw xY wd dlmZ d dlmZ d dlmZmZmZmZmZmZmZ ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        ej        j        d	Z ej!        j"        ej!        j#        ej!        j$        ej!        j#        dZ%ej&        j'        ej&        j(        dZ)i Z*d	 Z+ G d
 de          Z, G d de j-                  Z. e+d           G d de.                      Z/ e+d           G d de/                      Z0 e+d           G d de/                      Z1 e+d           G d de/                      Z2 e+d           G d de/                      Z3 e+d           G d de/                      Z4 e+d            G d! d"e.                      Z5 e+d#           G d$ d%e.                      Z6 e+d&           G d' d(e.                      Z7 e+d)           G d* d+e.                      Z8 e+d,           G d- d.e.                      Z9 e+d/           G d0 d1e/                      Z: e+d2           G d3 d4e/                      Z; e+d5           G d6 d7e/                      Z< e+d8           G d9 d:e.                      Z= e+d;           G d< d=e=                      Z> e+d>           G d? d@e.                      Z? e+dA           G dB dCe.                      Z@ e+dD           G dE dFe.                      ZA e+dG           G dH dIe.                      ZB e+dJ           G dK dLe.                      ZC e+dM           e+dN           G dO dPe.                                  ZD e+dQ           e+dR           G dS dTe.                                  ZE e+dU           G dV dWe.                      ZF e+dX           G dY dZe.                      ZG e+d[           G d\ d]e.                      ZH e+d^           G d_ d`e.                      ZI e+da           G db dce.                      ZJ e+dd           G de dfe.                      ZK e+dg           G dh die.                      ZL e+dj           G dk dleL                      ZM e+dm           G dn doe.                      ZN e+dp           G dq dre.                      ZO e+ds           G dt due.                      ZP e+dv           G dw dxe.                      ZQ e+dy           G dz d{e.                      ZRd| ZSeTd}k    r
 eS             g d~g dg dg dg dg dg dg dg dg dg ddZU e+d           G d de.                      ZVdS )    N)ListOptional)utils)	Converter)attention_speccommon_spec
model_spectransformer_specwav2vec2_specwav2vec2bert_specwhisper_spec)	gelu	gelu_fastgelu_newgelu_pythongelu_pytorch_tanh
quick_gelurelusiluswish)linearsullama3longrope)gemmgemvc                       fd}|S )z5Registers a model loader for this configuration name.c                 ,     |             t           <   | S N)_MODEL_LOADERS)clsconfig_names    d/usr/local/lib/hermes-agent/venv/lib/python3.11/site-packages/ctranslate2/converters/transformers.py	decoratorz"register_loader.<locals>.decorator<   s    &)cee{#
     )r"   r$   s   ` r#   register_loaderr'   9   s$         r%   c                       e Zd ZdZ	 	 	 	 	 	 ddedee         deee                  dedee         d	ed
efdZd Z	d Z
d Zd ZdS )TransformersConverterz/Converts models from Hugging Face Transformers.NFmodel_name_or_pathactivation_scales
copy_filesload_as_float16revisionlow_cpu_mem_usagetrust_remote_codec                 h    || _         || _        || _        || _        || _        || _        || _        dS )a  Initializes the converter.

        Arguments:
          model_name_or_path: Name of the pretrained model to download, or path to the
            directory containing the pretrained model.
          activation_scales: Path to the pre-computed activation scales. Models may
            use them to rescale some weights to smooth the intermediate activations
            and improve the quantization accuracy. See
            https://github.com/mit-han-lab/smoothquant.
          copy_files: List of filenames to copy from the Hugging Face model to the
            converted model directory.
          load_as_float16: Load the model weights as float16. More precisely, the model
            will be loaded with ``from_pretrained(..., dtype=torch.float16)``.
          revision: Revision of the model to download from the Hugging Face Hub.
          low_cpu_mem_usage: Enable the flag ``low_cpu_mem_usage`` when loading the model
            with ``from_pretrained``.
          trust_remote_code: Allow converting models using custom code.
        N)_model_name_or_path_activation_scales_copy_files_load_as_float16	_revision_low_cpu_mem_usage_trust_remote_code)selfr*   r+   r,   r-   r.   r/   r0   s           r#   __init__zTransformersConverter.__init__F   sA    8 $6 "3% /!"3"3r%   c                    t          j                    5  t          j                            | j        | j                  }|j        j        }t          
                    |          }|Mt          d|dd                    t          t                                                              d          t          t          |j                  }t#          |d          r|                    ||          }t          j        }d|i}t#          |d          r(|                    |                    |                     d	| j        rt           j        n!t          |d	d           pt          |d
d           i}| j        r
| j        |d<   | j        r
| j        |d<   | j        r
| j        |d<    | j        || j        fi ||}i }	| j        r
| j        |	d<    | j        || j        fi |	}
 |||
          }| j        r1t          j        | j        d          }|                    ||           | j        r2| j        D ]*}|                     | !                    |                     +|cd d d            S # 1 swxY w Y   d S )N)r0   z8No conversion is registered for the model configuration z  (supported configurations are: , )get_model_classconfigget_model_kwargsdtypetorch_dtyper.   r/   r0   cpu)map_location)"torchno_gradtransformers
AutoConfigfrom_pretrainedr2   r8   	__class____name__r    get
ValueErrorjoinsortedkeysgetattrarchitecture_namehasattrr>   AutoTokenizerupdater@   r5   float16r6   r7   
load_modelload_tokenizerr3   loadsmooth_activationr4   register_fileget_model_file)r9   r?   r"   loadermodel_classtokenizer_classextra_kwargskwargsmodeltokenizer_kwargs	tokenizerspecr+   filenames                 r#   _loadzTransformersConverter._loadj   sK   ]__ @	 @	!,<<(D<S =  F !*3K#''44F~ j #{{DIIf^5H5H5J5J.K.K$L$L$L$LN   ",0HIIKv011 J$44V[II*8O$f-Lv122 E##F$;$;F$C$CDDD ,<EMM $77 <v}d;;F ~ 4%)^z"& F.2.E*+& F.2.E*+#DOT5 9?CO E  "& P8<8O !45++!9 =M I 6%++D& B$)J+%% % %! ((/@AAA F $ 0 F FH&&t':':8'D'DEEEEA@	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	 @	s   II//I36I3c                      |j         |fi |S r   rI   )r9   r^   r*   ra   s       r#   rW   z TransformersConverter.load_model   s    *{*+=HHHHHr%   c                      |j         |fi |S r   ri   )r9   r_   r*   ra   s       r#   rX   z$TransformersConverter.load_tokenizer   s    ../ALLVLLLr%   c                    t           j                            | j                  r&t           j                            | j        |          }n9	 t          j        | j        |          }n# t
          j        j        $ r d }Y nw xY w|t           j        	                    |          st          d|d| j                  |S )N)repo_idrf   zFile z does not exist in model )ospathisdirr2   rN   huggingface_hubhf_hub_downloadr   EntryNotFoundErrorisfilerM   )r9   rf   rn   s      r#   r\   z$TransformersConverter.get_model_file   s    7==122 	7<< 8(CCDD&6 4x   #(;    <rw~~d33<*88T557  
 s   A( (B B)NNFNFF)rK   
__module____qualname____doc__strr   r   boolr:   rg   rW   rX   r\   r&   r%   r#   r)   r)   C   s        99
 ,0*. %"&"'"'"4 "4"4 $C="4 T#Y'	"4
 "4 3-"4  "4  "4 "4 "4 "4HA A AFI I IM M M    r%   r)   c                       e Zd ZdZed             Zej        d             Zd Z	d Z
d Zd Zd Zej        j        fd	Zd
 Zd Zd Zd ZdS )ModelLoaderzRBase class for loading Transformers models into a CTranslate2 model specification.c                     d S r   r&   r9   s    r#   rR   zModelLoader.architecture_name   s    tr%   c                     t                      r   NotImplementedErrorr9   rb   s     r#   get_model_speczModelLoader.get_model_spec   s    !###r%   c                     |                      |          }|                     |j        ||           |                     ||          }|                     ||           |S r   )r   
set_configr?   get_vocabularyset_vocabulary)r9   rb   rd   re   tokenss        r#   __call__zModelLoader.__call__   s]    ""5))UI666$$UI66D&)))r%   c                     d t          |                                                                d           D             S )Nc                     g | ]\  }}|S r&   r&   ).0token_s      r#   
<listcomp>z.ModelLoader.get_vocabulary.<locals>.<listcomp>   s,     
 
 
q 
 
 
r%   c                     | d         S N   r&   )items    r#   <lambda>z,ModelLoader.get_vocabulary.<locals>.<lambda>   s
    Q r%   )key)rO   	get_vocabitemsr9   rb   rd   s      r#   r   zModelLoader.get_vocabulary   sS    
 
"##%%++--3G3G  
 
 
 	
r%   c                     d S r   r&   r9   re   r   s      r#   r   zModelLoader.set_vocabulary       r%   c                     d S r   r&   r9   r?   rb   rd   s       r#   r   zModelLoader.set_config   r   r%   c                 6    |j         |_        |j        |_        d S r   weightgammabiasbetar9   re   modules      r#   set_layer_normzModelLoader.set_layer_norm   s    ]
K			r%   c                 T   |t           j        j        k    r|j        |_        n$|j        |_        |j        |_        |j        |_        t          |t          j                  r |j                            dd          |_        t          |d          r|j        |j        |_        d S d S d S )Nr   r   r   )r   QuantizationCT2r   qweightscalesweight_scaleqzerosweight_zero
isinstancerG   Conv1D	transposerS   r   )r9   re   r   
quant_types       r#   
set_linearzModelLoader.set_linear   s    1555 -DKK .DK &D%}Dfl122 	6+//155DK66"" 	$v{'>DIII	$ 	$'>'>r%   c                     |j         |_         d S r   )r   r   s      r#   set_embeddingszModelLoader.set_embeddings   s    mr%   c                 x    |j         |_        t          |dd          }|dk    r|j        |d          |_        d S d S )Noffsetr   r   	encodingsrQ   r9   re   r   r   s       r#   set_position_encodingsz"ModelLoader.set_position_encodings   sB    1--A::!^FGG4DNNN :r%   c                      t          d          )Nz7No activation smoothing logic is defined for this modelr~   )r9   re   r+   s      r#   rZ   zModelLoader.smooth_activation  s    !E
 
 	
r%   c           	         t          |dd           }|r|                    d          p|                    d          }|dk    rd }n[t                              |          }|?t          d|dd                    t                                                              |                    dd	          }|                    d
|          }nd }d	}t          |d
|          }|||fS )Nrope_scalingtype	rope_typedefaultRoPE scaling type 'T' is not yet implemented. The following RoPE scaling types are currently supported: r<   factorr   
rope_theta)rQ   rL   _SUPPORTED_ROPE_SCALINGr   rN   rP   )r9   r?   default_rope_thetar   r   rotary_scaling_typerotary_scaling_factorr   s           r#   get_rotary_paramszModelLoader.get_rotary_params  s   v~t<< 	K$((00QL4D4D[4Q4QII%%&*##&=&A&A)&L&L#&.-- %99dii0G0L0L0N0N&O&O&OQ  
 %1$4$4Xq$A$A!%)),8JKKJJ"&$%! 7IJJJ"$9:EEr%   N)rK   rt   ru   rv   propertyrR   abcabstractmethodr   r   r   r   r   r   r   r   r   r   r   r   rZ   r   r&   r%   r#   rz   rz      s        \\  X 	$ $ $  
 
 
          3>2J2N $ $ $ $$ $ $5 5 5
 
 

F F F F Fr%   rz   
BartConfigc                   `     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
dd	Zd
 Z xZS )
BartLoaderc                     dS )NBartForConditionalGenerationr&   r|   s    r#   rR   zBartLoader.architecture_name#  s    --r%   c                    t           j                            |j        j        |j        j        f|j        j        |j        j        t          |j        j	                 t          |j        dd                    }|                     |j        |j        j                   |                     |j        |j        j                   |                     |j        j        |j                   t          |dd           }|M|                                                                dk    r#|                                |j        j        _        |S )Nnormalize_embeddingTpre_norm
activationlayernorm_embeddingfinal_logits_biasr   )r
   TransformerSpecfrom_configr?   encoder_layersdecoder_layersencoder_attention_headsnormalize_before_SUPPORTED_ACTIVATIONSactivation_functionrQ   set_encoderencoderrb   set_decoderdecoderr   
projectionlm_headnonzeronumelsqueezer   )r9   rb   re   r   s       r#   r   zBartLoader.get_model_spec'  s   /;;\(%,*EFL0\2-el.NO '6KT R R < 
 
 	u{':;;;u{':;;;/???#E+>EE(->-F-F-H-H-N-N-P-PTU-U-U+<+D+D+F+FDL#(r%   c                     t                                          ||          }|j        j        t	          |          k     r|d |j        j                 }|S r   )superr   r?   
vocab_sizelenr9   rb   rd   r   rJ   s       r#   r   zBartLoader.get_vocabulary:  sL    ''y99<"S[[005el556Fr%   c                 Z    |                     |           |                    |           d S r   register_source_vocabularyregister_target_vocabularyr   s      r#   r   zBartLoader.set_vocabulary@  0    ''///''/////r%   c                     |j         |_         |j        |_        |j        |_        |                    |j        j                  |_        d S r   )	bos_token	eos_token	unk_tokenconvert_ids_to_tokensr?   decoder_start_token_iddecoder_start_tokenr   s       r#   r   zBartLoader.set_configD  sI    $.$.$.%.%D%DL/&
 &
"""r%   c                    |                      ||           t          |j        |j                  D ]\  }}|                     |j        |j        d           |                     |j        j        |j	                   | 
                    |j        j        |j                   | 
                    |j        j        |j                   |                     |j        j        |j                   d S NTself_attention)set_common_layersziplayerlayersset_attentionr   	self_attnr   
layer_normself_attn_layer_normr   ffnlinear_0fc1linear_1fc2final_layer_norm)r9   re   r   
layer_specr   s        r#   r   zBartLoader.set_encoderL  s    tW---!$TZ!@!@ 	S 	SJ)#    
 )4*  
 OOJN3UY???OOJN3UY???
 95;QRRRR	S 	Sr%   c                    |                      ||           t          |j        |j                  D ]\  }}|                     |j        |j        d           |                     |j        j        |j	                   t          |d          rG|                     |j        |j        d           |                     |j        j        |j                   |                     |j        j        |j                   |                     |j        j        |j                   |                     |j        j        |j                   d S )NTr   encoder_attnF)r   r   r   r  r  r   r  r   r  r  rS   	attentionr  encoder_attn_layer_normr   r  r  r  r	  r
  r  )r9   re   r   r  r   s        r#   r   zBartLoader.set_decoder^  s^   tW---!$TZ!@!@ 	S 	SJ)#    
 )4*  
 un-- 	""(&#( #   
 ##(31  
 OOJN3UY???OOJN3UY???
 95;QRRRR1	S 	Sr%   Fc                 0   d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   |r!t          j        |j        d         |           nPt          j        |j        d         |d d                    t          j        |j        d         |dd                     |                     |j        d         |j                   d S )Nc                 4    g | ]}t          j                    S r&   r   
LinearSpecr   r   s     r#   r   z,BartLoader.set_attention.<locals>.<listcomp>|  !    CCCQ.00CCCr%      r   r      )	ranger   q_projk_projv_projr   fuse_linearr   out_projr9   re   r  r   split_layerss        r#   r  zBartLoader.set_attention{  s    CC%((CCCQ)9:::Q)9:::Q)9::: 	@dk!nl;;;;dk!nl2A2.>???dk!nl122.>???B);<<<<<r%   c                 8   dd l }t          |d          s.|j        j        r|                    |j        j                  nd}n|j        }||_        |                     |j	        |j
                   |                     t          |j        t                    r|j        d         n|j        |j                   t          |d          r |                     |j        |j                   t          |d          r"|                     |j        |j                   d S d S )Nr   embed_scale      ?r  r   )mathrS   r?   scale_embeddingsqrtd_modelr#  scale_embeddingsr   position_encodingsembed_positionsr   r   
embeddingslistembed_tokensr   r  r   )r9   re   r   r%  r#  s        r#   r   zBartLoader.set_common_layers  s1   v}-- 	- =0		&-/000 K !,K +##D$;V=STTT dot44%""_	
 	
 	
 6<(( 	D1BCCC6011 	V 8&:TUUUUU	V 	Vr%   F)rK   rt   ru   r   rR   r   r   r   r   r   r   r  r   __classcell__rJ   s   @r#   r   r   !  s        . . X.  &    0 0 0
 
 
S S S$S S S:= = = =V V V V V V Vr%   r   MarianConfigc                   T     e Zd Zed             Z fdZd Z fdZ fdZd Z	 xZ
S )MarianMTLoaderc                     dS )NMarianMTModelr&   r|   s    r#   rR   z MarianMTLoader.architecture_name  s    r%   c                     d|j         _        d|j         _        t                                          |          }|                     |           |S NF)r?   r   r   r   r   _remove_pad_weights)r9   rb   re   rJ   s      r#   r   zMarianMTLoader.get_model_spec  sG    (-%+0(ww%%e,,  &&&r%   c                 N    |j         |_         |j        |_        |j         |_        d S r   )r   r   r   r   s       r#   r   zMarianMTLoader.set_config  s+    $.$. &/%8"""r%   c                 Z    d|_         t                                          ||           d S NT)start_from_zero_embeddingr   r   r9   re   r   rJ   s      r#   r   zMarianMTLoader.set_decoder  s+    )-&D'*****r%   c                     t                                          ||          }|d         dk    r|                                 |S )Nr  z<pad>)r   r   popr   s       r#   r   zMarianMTLoader.get_vocabulary  s?     ''y99":  JJLLLr%   c                    |j         j        d         |j        j        |j        j        g}|d         j        j        d         dz
  }|D ]}|j        j        d         |dz   k    r|j        d d         |_        t          |t          j                  rA|	                                r-|j
        j        d         |dz   k    r|j
        d d         |_
        d S )Nr   r   r  )r   r,  r   r   r   shaper   r   r  has_biasr   )r9   re   vocab_specsnew_vocab_size
vocab_specs        r#   r9  z"MarianMTLoader._remove_pad_weights  s    L#A&L#L#
 %Q.4Q7!;% 	7 	7J &q)^a-???$.$5crc$:
!:{'=>>7''))7 O)!,0BBB",/#2#"6
	7 	7r%   )rK   rt   ru   r   rR   r   r   r   r   r9  r0  r1  s   @r#   r4  r4    s          X    9 9 9+ + + + +    7 7 7 7 7 7 7r%   r4  M2M100Configc                   D     e Zd Zed             Z fdZd Z fdZ xZS )M2M100Loaderc                     dS )NM2M100ForConditionalGenerationr&   r|   s    r#   rR   zM2M100Loader.architecture_name  s    //r%   c                 v    d|j         _        d|j         _        t                                          |          S )NTF)r?   r   r   r   r   )r9   rb   rJ   s     r#   r   zM2M100Loader.get_model_spec  s/    (,%+0(ww%%e,,,r%   c                 8    |j         |j        d          |_        d S r   )weightsr   r   r   s      r#   r   z#M2M100Loader.set_position_encodings  s    8r%   c                    t                                          ||          }|d         |j        k    r-|                    |j        |                                           |j                            dg           D ]}||vr|                    |           t          |d|j
        j        t          |          z
            }|dk    r|d t          |          D             z  }|S )Nr  additional_special_tokensnum_madeup_wordsr   c                     g | ]}d |z  S )zmadeupword%dr&   r   is     r#   r   z/M2M100Loader.get_vocabulary.<locals>.<listcomp>  s    KKKa~)KKKr%   )r   r   r   insertunk_token_idr@  special_tokens_maprL   appendrQ   r?   r   r   r  )r9   rb   rd   r   r   rQ  rJ   s         r#   r   zM2M100Loader.get_vocabulary  s    ''y99 ":,,,MM)0&**,,???1556QSUVV 	% 	%EF""e$$$")5<+BS[[+P
 
 aKK59I3J3JKKKKFr%   )	rK   rt   ru   r   rR   r   r   r   r0  r1  s   @r#   rI  rI    sy        0 0 X0- - - - -
9 9 9        r%   rI  MBartConfigc                   *    e Zd Zed             Zd ZdS )MBartLoaderc                     dS )NMBartForConditionalGenerationr&   r|   s    r#   rR   zMBartLoader.architecture_name  s    ..r%   c                     |j         |_         |j        |_        |j        |_        t          |j        dd           dv r	d |_        d S |j        |_        d S )Nr_   )MBartTokenizerN)r   r   r   rQ   r?   r   r   s       r#   r   zMBartLoader.set_config  s_    $.$.$. 5<!2D99=UUU)-F&&&)2)<F&&&r%   NrK   rt   ru   r   rR   r   r&   r%   r#   r[  r[    s<        / / X/	= 	= 	= 	= 	=r%   r[  PegasusConfigc                   *    e Zd Zed             Zd ZdS )PegasusLoaderc                     dS )NPegasusForConditionalGenerationr&   r|   s    r#   rR   zPegasusLoader.architecture_name      00r%   c                 f    |j         |_        |j        |_        |j        |_        |j         |_        d S r   )	pad_tokenr   r   r   r   r   s       r#   r   zPegasusLoader.set_config  s4    $.$.$.%.%8"""r%   Nr`  r&   r%   r#   rc  rc    s<        1 1 X19 9 9 9 9r%   rc  	OPTConfigc                   \     e Zd Zed             Zd Zd Zd Zd Z fdZ	d Z
 fdZ xZS )		OPTLoaderc                     dS )NOPTForCausalLMr&   r|   s    r#   rR   zOPTLoader.architecture_name       r%   c                 x   t           j                            |j        j        |j        j        |j        j        t          |j        j                 |j        j	        |j        j
        k              }|                     |j        |j        j                   |                     |j        j        |j                   |S )N)r   r   project_in_out)r
   TransformerDecoderModelSpecr   r?   num_hidden_layersnum_attention_headsdo_layer_norm_beforer   r   word_embed_proj_dimhidden_sizer   r   rb   r   r   r   r9   rb   re   s      r#   r   zOPTLoader.get_model_spec$  s    ;GGL*L,\6-el.NO <;u|?WW H 
 
 	u{':;;;/???r%   c                 &   t          |j        j                  D ]v\  }}d|z  }t          j        |j        j        |j        j        d         |d|z                      t          j        |j        j        |j        j	        |d|z                      wd S )Nzmodel.decoder.layers.%dr   z%s.self_attn.q_projz%s.fc1)
	enumerater   r   r   rZ   r   r  r   r  r  )r9   re   r+   rT  r   layer_scopes         r#   rZ   zOPTLoader.smooth_activation1  s    !$,"455 	 	HAu3a7K#$/$+A.!"7+"EF   #	$	"!(["89   	 	r%   c                 0    |                     |           d S r   register_vocabularyr   s      r#   r   zOPTLoader.set_vocabularyA        (((((r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r   r   r   r   s       r#   r   zOPTLoader.set_configD  )    $.$.$.r%   c                 :   t                                          ||           |j         |                     |j        |j                   |j         |                     |j        |j                   |j        "|                     |j        |j                   d S d S r   )r   r   
project_inr   project_outr  r   r  r>  s      r#   r   zOPTLoader.set_decoderI  s    D'***)OODOW-?@@@*OOD,g.ABBB#/1IJJJJJ 0/r%   c                     d|_         |                     |j        |j                   |                     |j        |j                   d S r8  )r)  r   r*  r+  r   r,  r.  r   s      r#   r   zOPTLoader.set_common_layersS  sG     %##D$;V=STTTDOV-@AAAAAr%   c                    t                                          ||          }d}t          |          dz  dk    rId                    |          }||vr|                    |           |dz  }t          |          dz  dk    I|S )Nr      zmadeupword{:04d}r   )r   r   r   formatrX  )r9   rb   rd   r   rT  symbolrJ   s         r#   r   zOPTLoader.get_vocabularyX  s    ''y99&kkAo""'..q11FV##f%%%FA	 &kkAo"" r%   )rK   rt   ru   r   rR   r   rZ   r   r   r   r   r   r0  r1  s   @r#   rk  rk    s            X      ) ) )/ / /
K K K K KB B B

 
 
 
 
 
 
 
 
r%   rk  GPTBigCodeConfigc                   L     e Zd Zed             Zd Zd Z fdZd Zd Z	 xZ
S )GPTBigCodeMHALoaderc                     dS )NGPTBigCodeForCausalLMr&   r|   s    r#   rR   z%GPTBigCodeMHALoader.architecture_nameg  s    &&r%   c                 *   t           j                            |j        j        |j        j        dt          |j        j                 d          }|                     |j	        |j
                   |                     |j	        j        |j                   |S )NT)r   r   multi_query_attentionr
   rq  r   r?   n_layern_headr   r   r   r   transformerr   r   r   rw  s      r#   r   z"GPTBigCodeMHALoader.get_model_speck  s    ;GGL L-el.NO"& H 
 
 	u'8999/???r%   c                 0    |                     |           d S r   r|  r   s      r#   r   z"GPTBigCodeMHALoader.set_vocabularyx  r~  r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S N<extra_id_%d>r   r   r?   r   r   r  rX  r9   rb   rd   r   	extra_idsrT  rJ   s         r#   r   z"GPTBigCodeMHALoader.get_vocabulary{  g    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zGPTBigCodeMHALoader.set_config  r  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j	                   t          |j        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S NFr   r   r)  r   r,  wter   r*  wper   r  ln_fr   r   hr   ln_1r   r   attnc_attnc_projr  ln_2r  mlpc_fcr	  r9   re   r   r  r   s        r#   r   zGPTBigCodeMHALoader.set_decoder  J    %DOVZ888##D$;VZHHHDOV[999!$TZ!:!: 	G 	GJ
 9 DejQQQOOJ5<Q?ARSSSOOJ5<Q?ARSSS
 95:FFFOOJN3UY^DDDOOJN3UY5EFFFF	G 	Gr%   )rK   rt   ru   r   rR   r   r   r   r   r   r0  r1  s   @r#   r  r  e  s        ' ' X'  ) ) )    / / /
G G G G G G Gr%   r  
GPT2Configc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )
GPT2Loaderc                     dS )NGPT2LMHeadModelr&   r|   s    r#   rR   zGPT2Loader.architecture_name        r%   c                 (   t           j                            |j        j        |j        j        dt          |j        j                           }|                     |j	        |j
                   |                     |j	        j        |j                   |S )NT)r   r   r  rw  s      r#   r   zGPT2Loader.get_model_spec  s}    ;GGL L-el.NO	 H 
 
 	u'8999/???r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zGPT2Loader.set_vocabulary  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zGPT2Loader.set_config  r  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j	                   t          |j        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  r  r  s        r#   r   zGPT2Loader.set_decoder  r  r%   N	rK   rt   ru   r   rR   r   r   r   r   r&   r%   r#   r  r    sn        ! ! X!
 
 
) ) )/ / /
G G G G Gr%   r  
GPTJConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )
GPTJLoaderc                     dS )NGPTJForCausalLMr&   r|   s    r#   rR   zGPTJLoader.architecture_name  r  r%   c           
      p   t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j        |j        j        |j        j                   |                     |j
        j        |j                   |S NTFr   r   
rotary_dimrotary_interleaveparallel_residualshared_layer_norm)r
   rq  r   r?   r  r  r   r   r  r   r   r  r   r   r   rw  s      r#   r   zGPTJLoader.get_model_spec  s    ;GGL L-el.NO|.#"" H 	
 	
 	LL#L		
 	
 	
 	/???r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zGPTJLoader.set_vocabulary  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zGPTJLoader.set_config  r  r%   c                 6   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]5\  }}|                     |j
        |j                   |j        j        j        }|j        j        j        }|j        j        j        }	t#          j        |||          }t#          j        |||          }t'          j        |||	f          |j        j        d         _        |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   7d S r  )r)  r   r,  r  r   r  r  r   r   r  r  r  r  r  r   r  r  r   permute_for_sliced_rotaryrE   catr   r   r   r  r  r  r  fc_inr	  fc_out)
r9   re   r   r  	num_headsr  r   qwkwvws
             r#   r   zGPTJLoader.set_decoder  s]    %DOVZ888DOV[999!$TZ!:!: 	G 	GJ
 <ejIII")B")B")B0Y
KKB0Y
KKB9>BB<9P9PJ%,Q/6OOJ5<Q?ATUUUOOJN3UY_EEEOOJN3UY5EFFFF	G 	Gr%   Nr  r&   r%   r#   r  r    sn        ! ! X!  *) ) )/ / /
G G G G Gr%   r  CodeGenConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )CodeGenLoaderc                     dS )NCodeGenForCausalLMr&   r|   s    r#   rR   zCodeGenLoader.architecture_name      ##r%   c           
         t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }d}t          |j        d          r|j        j
        dv rd}|                     |j        |j        |j        j        |j        j        |j        j        |           |                     |j        j        |j                   |S )	NTFr     head_dim)      r  )mp_num)r
   rq  r   r?   r  r  r   r   r  rS   r  r   r   r  n_embdr   r   r   )r9   rb   re   r  s       r#   r   zCodeGenLoader.get_model_spec   s    ;GGL L-el.NO|.#"" H 	
 	
 5<,, 	1F*1T1T FLL#LL 	 	
 	
 	
 	/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zCodeGenLoader.get_vocabulary  sg    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zCodeGenLoader.set_vocabulary(  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zCodeGenLoader.set_config+  r  r%   c                 6   d|_         |                     |j        |j                   |                     |j        |j                   t          j        d|dz            	                    dd          j
                                                                        }||z  t          j        fd|D                       }t          |j        |j                  D ]:\  }	}
|                     |	j        |
j                   |
j        j        j        }||d d f         }|                    dd          \  }}}t1          j        |||          }t1          j        |||          }t          j        |||f          |	j        j        d         _        |                     |	j        j        d         |
j        j                   |                     |	j        j        |
j         j!                   |                     |	j        j"        |
j         j#                   <d S )NFr   r  r  c                 L    g | ] }t          j        |z  |d z   z            !S )r   )rE   arange)r   rT  	local_dims     r#   r   z-CodeGenLoader.set_decoder.<locals>.<listcomp>8  s2    XXX!U\!i-!a%9)<==XXXr%   dimr   )$r)  r   r,  r  r   r  r  npr  reshapeTflattentolistrE   r  r   r   r  r  r  r  qkv_projr   chunkr   r  r   r   r   r  r  r  r  r  r	  r  )r9   re   r   r  r  	embed_dimr  base_permutationpermutationr  r   r  new_qkv_projr  r  r  r  s                   @r#   r   zCodeGenLoader.set_decoder0  s    %DOVZ888DOV[9999Q
33;;BBBDLLNNUUWW'	iXXXXGWXXX
 
 "%TZ!:!: 	G 	GJ
 <ejIII z*1H $KN3L%++A1+55JBB 0Y
KKB0Y
KKB9>BB<9P9PJ%,Q/6OOJ5<Q?ATUUUOOJN3UY_EEEOOJN3UY5EFFFF-	G 	Gr%   rK   rt   ru   r   rR   r   r   r   r   r   r0  r1  s   @r#   r  r    s        $ $ X$  <    ) ) )/ / /
!G !G !G !G !G !G !Gr%   r  GPTNeoXConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )GPTNeoXLoaderc                     dS )NGPTNeoXForCausalLMr&   r|   s    r#   rR   zGPTNeoXLoader.architecture_nameV  r  r%   c                    t           j                            |j        j        |j        j        dt          |j        j                 t          |j        j	        |j        j
        |j        j        z  z            d|j        j        d          }|                     |j        |j        |j        j                   |                     |j        j        |j                   |S r  )r
   rq  r   r?   rr  rs  r   
hidden_actint
rotary_pctrv  use_parallel_residualr   r   gpt_neoxr   r   	embed_outrw  s      r#   r   zGPTNeoXLoader.get_model_specZ  s    ;GGL*L,-el.EF'<+u|/OOQ  $#l@# H 
 
 	u~u|7WXXX/AAAr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zGPTNeoXLoader.get_vocabularym  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zGPTNeoXLoader.set_vocabularyv  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zGPTNeoXLoader.set_configy  r  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]	\  }}t          |d          rA|                     |j        |j                   |                     |j        |j                   nJ|                     |j        j        |j                   |                     |j        j        |j                   |j        j        j        }|j        j        j        }|                    |dd|j        d                                       dd                              d|j        d                   }|                    |dd                              dd                              d          }||j        j        d         _        ||j        j        d         _        |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S )NFinput_layer_normr  r  r   r   ) r)  r   r,  embed_inr   r  r  r   r   r  rS   r  input_layernormpost_attention_layer_normpost_attention_layernormr   r  r  query_key_valuer   r   r  rB  swapaxesr   r   denser  r  dense_h_to_4hr	  dense_4h_to_h)r9   re   r   r  r  r   qkv_wqkv_bs           r#   r   zGPTNeoXLoader.set_decoder~  s3    %DOV_===DOV-DEEE!$TZ!?!? 	N 	NJz#566 ##J$?AVWWW##8%:X    ##-8%:O   ##N-u/M   O3:EO38E iBB@@!QU[_-- 
 MM)Q33<<QBBJJ2NNE9>J%,Q/67<J%,Q/4OOJ5<Q?AVWWWOOJN3UY5LMMMOOJN3UY5LMMMM=	N 	Nr%   r  r1  s   @r#   r  r  T  s        $ $ X$  &    ) ) )/ / /
#N #N #N #N #N #N #Nr%   r  WhisperConfigc                   l     e Zd Zed             Zd Zd Zd Z fdZd Z	 fdZ
 fdZd	 Zd
 Z xZS )WhisperLoaderc                     dS )NWhisperForConditionalGenerationr&   r|   s    r#   rR   zWhisperLoader.architecture_name  rf  r%   c                 b   t          j        |j        j        |j        j        |j        j        |j        j                  }|                     |j        |j	        j                   | 
                    |j        |j	        j                   |                     |j        j        |j                   |S r   )r   WhisperSpecr?   r   r   r   decoder_attention_headsr   r   rb   r   r   r   r   proj_outrw  s      r#   r   zWhisperLoader.get_model_spec  s    'L'L0L'L0	
 
 	u{':;;;u{':;;;/@@@r%   c                 V    g dt          dg           }|sg S fd|D             S )N)z<|endoftext|>z<|startoftranscript|>z<|translate|>z<|transcribe|>z<|startoflm|>z<|startofprev|>z<|nocaptions|>z<|notimestamps|>rP  c                 B    g | ]}|v                     |          S r&   )convert_tokens_to_ids)r   r   non_lang_special_tokensrd   s     r#   r   z>WhisperLoader._get_lang_ids_from_tokenizer.<locals>.<listcomp>  s=     
 
 
333 ++E22333r%   )rQ   )r9   rd   additional_tokensr  s    ` @r#   _get_lang_ids_from_tokenizerz*WhisperLoader._get_lang_ids_from_tokenizer  sh    	#
 	#
 	#
 $I/JBOO  	I
 
 
 
 
*
 
 
 	
r%   c                    t          |dd           }|p|j        |_        |j        |_        t          |d          r|j        |_        t          |d          r+t          |j        	                                          |_
        nF|j        j        |_        |j        j        |_        t                              |j                  |_        t          |dd           |                     |          |_
        |j        _|j        j        }|j        j        }t%          t'          j        t+          |dz  |          t+          |                              |_        d S d S )Ngeneration_configalignment_heads
lang_to_idlang_idsr  )rQ   suppress_tokenssuppress_idsbegin_suppress_tokenssuppress_ids_beginrS   r  rO   r   valuesr!  r?   _WHISPER_ALIGNMENT_HEADSrL   name_or_pathr  r   r  r-  	itertoolsproductr  )r9   r?   rb   rd   
gen_config
num_layersr  s          r#   r   zWhisperLoader.set_config  sD   U$7>>
!","<F(2(HF%z#455 D)3)C&z<00 I"()>)E)E)G)G"H"H"',">F(-(JF%%=%A%A%BT%U%UF"6:t,,4"??	JJFO!)4J<I%)!*/:66)$$ & &F"""	 *)r%   c           	          t                                          ||          }|                    d t          |j        j        t          |          z
            D                        |S )Nc              3   &   K   | ]}d |dz  z  V  dS )z<|%.2f|>g{Gz?Nr&   rS  s     r#   	<genexpr>z/WhisperLoader.get_vocabulary.<locals>.<genexpr>  s?       
 
 !d(#
 
 
 
 
 
r%   )r   r   extendr  r?   r   r   r   s       r#   r   zWhisperLoader.get_vocabulary  so    ''y99 	 
 
5<2S[[@AA
 
 
 	
 	
 	

 r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zWhisperLoader.set_vocabulary  r~  r%   c                     |                      |j        |j                   |                      |j        |j                   t                                          ||           d S r   )
set_conv1dconv1conv2r   r   )r9   re   r   rJ   s      r#   r   zWhisperLoader.set_encoder  sS    
GM222
GM222D'*****r%   c                     |                      |j        |j                   t                                          ||           d S r   )r   r,  r.  r   r   r>  s      r#   r   zWhisperLoader.set_decoder  s>    DOW-ABBBD'*****r%   c                     |                      |j        |j                   |                     |j        |j                   d S r   )r   r*  r+  r   r  r   s      r#   r   zWhisperLoader.set_common_layers  s?    ##D$;V=STTTDOV->?????r%   c                 6    |j         |_         |j        |_        d S r   r   r   r   s      r#   r3  zWhisperLoader.set_conv1d  s    mK			r%   )rK   rt   ru   r   rR   r   r  r   r   r   r   r   r   r3  r0  r1  s   @r#   r  r    s        1 1 X1  
 
 
,  :	 	 	 	 	) ) )+ + + + +
+ + + + +@ @ @             r%   r  Wav2Vec2Configc                   d     e Zd Zed             Zd Zd Zd Zd Zd Z	d Z
d Z fd	Zd
 Z xZS )Wav2Vec2Loaderc                     dS )NWav2Vec2ForCTCr&   r|   s    r#   rR   z Wav2Vec2Loader.architecture_name  rn  r%   c                    t          |j        j        dd          }t          j        |j        j        j        |j        j        j        j        |j        j        j        j        |j	        j
        j        d         |          }|j        j        j        D ]M}|j        |_        |j        |_        |j        j        |_        |j        j        |_        |j        j        |_        N|                     |j        ||j        j                   |S Nreturn_hiddenFr   )rQ   wav2vec2r?   r   Wav2Vec2Specnum_feat_extract_layersr   rr  rs  r   r   rB  r  r  r  r  r  feed_forwardintermediate_act_fnactivation_fnintermediate_denser  output_denser
  r   )r9   rb   rA  re   r   s        r#   r   zWav2Vec2Loader.get_model_spec  s     5NN)N!9N");N")=M &q)
 
 ^+2 	8 	8E#oEO).)9E&"'"4"HE*=EI*7EIIuen.CDDDr%   c                     d S r   r&   r   s       r#   r   zWav2Vec2Loader.set_config&      r%   c                 *    |                                 S r   r   r   s      r#   r   zWav2Vec2Loader.get_vocabulary)      ""$$$r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zWav2Vec2Loader.set_vocabulary,  r~  r%   c                    |j         d         j        j        |j        j        _        |j         d         j        j        |j        j        _        |                     |j        j        |j         d         j                   t          |j        |j         dd                    D ]Q\  }}|j        j        |j        _        |j        j        |j        _        |                     |j        |j                   Rd S )Nr   r   )	conv_layersconvr   feat_layer0r   r   r  r   
feat_layer)r9   re   feature_extractor
spec_layermodule_layers        r#   set_feature_extractorz$Wav2Vec2Loader.set_feature_extractor/  s    '8'DQ'G'L'S$%6%B1%E%J%O"'):)Fq)I)T	
 	
 	
 ),O.:122>)
 )
 	P 	P$J &2%6%=JO"#/#4#9JO 
 5|7NOOOO	P 	Pr%   c                     |                      |j        |j                   |                     |j        |j                   d S r   r   fp_layer_normr  r   fp_projectionr   r9   re   feature_projections      r#   set_feature_projectionz%Wav2Vec2Loader.set_feature_projection<  @    D.0B0MNNN*,>,IJJJJJr%   c                 >   |j         j        j        j                                        |j         j        j        _        |j         j        j                                        |j         j        j        _        |j                                         D ] }|j                                        |_        !|                     t          j        dd|j	        f                     |j         j        j        |j         j        _        |j         j        j        |j         j        _        d S r   )
pos_conv_embedrR  r   datafloatr   
parametersrE   randnrv  )r9   re   r   r?   params        r#   set_pos_conv_embedz!Wav2Vec2Loader.set_pos_conv_embed@  s     "'.399;; 	#*/ 180F0K0P0V0V0X0X#(-+6688 	, 	,E))++EJJu{Aq&2D+EFFGGG*1*@*E*L '(/(>(C(H %%%r%   c                    |                      ||j        j                   |                     ||j        j                   |                     ||j        j        |           t                                          ||j        j                   t          |j        j
        dd          }|s"|                     |j        |j                   d S d S NrA  F)rX  rB  rU  r_  r^  rh  r   r   r   rQ   r?   r   r   )r9   re   rb   r?   rA  rJ   s        r#   r   zWav2Vec2Loader.set_encoderM  s    ""4)IJJJ##D%.*KLLLen&<fEEED%."8999 5NN 	9OODL%-88888	9 	9r%   c                 F    |                      |j        |j                   d S r   )r   r  r   s      r#   r   z Wav2Vec2Loader.set_common_layersV  s#    DOV->?????r%   )rK   rt   ru   r   rR   r   r   r   r   rX  r_  rh  r   r   r0  r1  s   @r#   r<  r<    s            X   *  % % %) ) )P P PK K KI I I9 9 9 9 9@ @ @ @ @ @ @r%   r<  Wav2Vec2BertConfigc                   j    e Zd Zed             Zd Zd Zd Zd Zd Z		 ddZ
d	 Zd
 Zd Zd Zd ZdS )Wav2Vec2BertLoaderc                     dS )NWav2Vec2BertForCTCr&   r|   s    r#   rR   z$Wav2Vec2BertLoader.architecture_name\  r  r%   c                    t          |j        j        dd          }t          j        |j        j        j        |j        j        j        |j        j        j	        d         |          }| 
                    |j        |           |S r@  )rQ   wav2vec2_bertr?   r   Wav2Vec2BertSpecnum_adapter_layersrr  r   r   rB  r   r   )r9   rb   rA  re   s       r#   r   z!Wav2Vec2BertLoader.get_model_spec`  su     3 :OUSS 1&9&8M &q)	
 
 	u---r%   c                     d S r   r&   r   s       r#   r   zWav2Vec2BertLoader.set_configk  rK  r%   c                 *    |                                 S r   rM  r   s      r#   r   z!Wav2Vec2BertLoader.get_vocabularyn  rN  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   z!Wav2Vec2BertLoader.set_vocabularyq  r~  r%   c                     |                      |j        |j                   |                     |j        |j                   d S r   rZ  r]  s      r#   r_  z)Wav2Vec2BertLoader.set_feature_projectiont  r`  r%   Nc                 h   d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   t          j        |j        d         |           |                     |j        d         |j                   |s|rk|j	        j
        |_        t          j        d                              |          |_        t          j        d                              |          |_        d S d S )Nc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z4Wav2Vec2BertLoader.set_attention.<locals>.<listcomp>{  r  r%   r  r   r   r  r  int32)r  r   linear_qlinear_klinear_vr   r  r   
linear_outdistance_embeddingr   !relative_asymmetric_position_keysr  rA   r   relative_left_max_positionrelative_right_max_position)r9   re   r  left_max_positionright_max_positionr!  s         r#   r  z Wav2Vec2BertLoader.set_attentionx  s    DC%((CCCQ);<<<Q);<<<Q);<<<$+a.,777B)=>>> 	 2 	5>5Q5XD2.0hw.?.?.D.DEV.W.WD+/1x/@/@/E/E"0 0D,,,	 	r%   c                 <   t          ||          D ]	\  }}|                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j	        |j        j
                   |                     |j        |j        ||           |                     |j        |j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j        j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j	        |j        j
                   |                     |j         |j!                   d S r   )"r   r   enc_ffn1_layer_normffn1_layer_normr   enc_ffn1r  ffn1rH  r	  rI  r  enc_attnr  enc_attn_layer_normr  enc_conv_layer_normconv_moduler  r3  enc_conv_pointwise_conv1pointwise_conv1enc_conv_depthwise_convdepthwise_convenc_conv_depthwise_layer_normdepthwise_layer_normenc_conv_pointwise_conv2pointwise_conv2enc_ffn2_layer_normffn2_layer_normenc_ffn2ffn2enc_final_layer_normr  )r9   spec_layersr  r  r  slayerr   s          r#   set_wav2vec2bert_encoderz+Wav2Vec2BertLoader.set_wav2vec2bert_encoder  s    !f55 	U 	UMFE :E<QRRROOFO4ej6STTTOOFO4ej6MNNN2CEW    :E<VWWW*E,=,H   OO/1B1R   OO.0A0P   4!6   OO/1B1R    :E<QRRROOFO4ej6STTTOOFO4ej6MNNN ;U=STTTT7	U 	Ur%   c                 ^   t          ||          D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j	        |j
                   |                     |j        |j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r   )r   r   adpt_residual_layer_normresidual_layer_normr3  adpt_residual_convresidual_convadpt_attn_layer_normr  adpt_attn_convself_attn_convr  adpt_attn_layerr  adpt_ffn_layer_normffn_layer_normr   adpt_ffnr  r  rH  r	  rI  )r9   r  r  r  r   s        r#   set_wav2vec2bert_adapterz+Wav2Vec2BertLoader.set_wav2vec2bert_adapter  s    f55 
	N 
	NMFE/1J   OOF5u7JKKK ;U=WXXXOOF153GHHHv5uGGG :E<PQQQOOFO4ei6RSSSOOFO4ei6LMMMM
	N 
	Nr%   c                    |                      ||j        j                   |                     |j        |j        j        j        |j        j        j        |j        j        j	                   | 
                    |j        |j        j        j                   t          |j        j        dd          }|s"|                     |j        |j                   d S d S rj  )r_  rr  r^  r  r   r   r  r?   left_max_position_embeddingsright_max_position_embeddingsr  adapter_layersadapterrQ   r   r   )r9   re   rb   rA  s       r#   r   zWav2Vec2BertLoader.set_encoder  s    ##D%*=*PQQQ%%'.&C&D		
 	
 	
 	%%!4!<!C	
 	
 	
   3 :OUSS 	9OODL%-88888	9 	9r%   c                 H    |j         |_         |j        |j        |_        d S d S r   r9  r   s      r#   r3  zWav2Vec2BertLoader.set_conv1d  s(    m;"DIII #"r%   c                 H    |j         |_        |j        |j        |_        d S d S r   r   r   s      r#   r   z!Wav2Vec2BertLoader.set_layer_norm  s(    ]
;"DIII #"r%   )NN)rK   rt   ru   r   rR   r   r   r   r   r_  r  r  r  r   r3  r   r&   r%   r#   rn  rn  Z  s        $ $ X$	 	 	  % % %) ) )K K K
 KO    U U U@N N N9 9 9$ $ $
$ $ $ $ $r%   rn  T5Configc                   n     e Zd Zed             Zd Z fdZd Zd ZddZ	d Z
d	 Zd
 ZddZd Z xZS )T5Loaderc                     dS )NT5ForConditionalGenerationr&   r|   s    r#   rR   zT5Loader.architecture_name  s    ++r%   c           	         t           j                            |j        j        |j        j        f|j        j        dt          |j        j                 |j        j	        dd          }| 
                    |j        |j                   | 
                    |j        |j        d           |                     |j        j        |j                   |j        j        r|j        j        dz  |j        _        |S )NT)r   r   ffn_glurelative_attention_biasrms_norm)
is_decoderg      )r
   r   r   r?   r,  num_decoder_layersr  r   dense_act_fnis_gated_act	set_stackr   r   r   r   r   tie_word_embeddingsr(  scale_outputsrw  s      r#   r   zT5Loader.get_model_spec  s    /;;\$el&EFL"-el.GHL-$( < 
 
 	t|U]333t|U]tDDD/???<+ 	D).)=t)CDL&r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zT5Loader.get_vocabulary  r  r%   c                 Z    |                     |           |                    |           d S r   r   r   s      r#   r   zT5Loader.set_vocabulary  r   r%   c                     |j         |_        |j        |_        |j        |_        t	          |j        d          r&|                    |j        j                  |_        d S |j         |_        d S )Nr   )	rh  r   r   r   rS   r?   r   r   r   r   s       r#   r   zT5Loader.set_config  sq    $.$.$.5<!9:: 	=)2)H)H3* *F&&& *3)<F&&&r%   Fc                    |                      |j        |j                   |                     t	          |j        t                    r|j        d         n|j        |j                   d|_        t          t          |j        |j                            D ]\  }\  }}|                     |j        |j        d                    |dk    r4|j        d         j        }|j        |j        _        |j        |j        _        |r&|                     |j        |j        d                    |                     |j        |j        d                    d S )Nr   Fr   r  )r   r  r  r   r   r,  r-  r.  r)  ry  r   r   blockset_self_attentionr   r  relative_attention_max_distanceset_cross_attentionr  set_ffnr  )r9   re   r   r  rT  r  r  first_self_attentions           r#   r  zT5Loader.set_stack  sI   DOV-DEEE dot44%""_	
 	
 	
 !&&/DJ0M0M&N&N 	: 	:"A"
E##J$=u{1~NNN1uu'+z!}'C$(@ )A )H )I  O(()=u{1~NNNLLR9999!	: 	:r%   c                    t          |d          rK|                     |j        |j        j                   |                     |j        |j        j                   n%|                     |j        |j        j                   |                     |j        |j        j	                   | 
                    |j        |j                   d S )Nlinear_0_noact)rS   r   r  DenseReluDensewi_0r  wi_1wir	  wor   r  r   s      r#   r  zT5Loader.set_ffn   s    4)** 	EOODM6+@+EFFFOOD/1F1KLLLLOODM6+@+CDDDv'<'?@@@DOV->?????r%   c                     |                      ||j        d           |                     |j        |j                   d S r   )r  SelfAttentionr   r  r   s      r#   r  zT5Loader.set_self_attention*  sA    4!5dKKKDOV->?????r%   c                 |    |                      ||j                   |                     |j        |j                   d S r   )r  EncDecAttentionr   r  r   s      r#   r  zT5Loader.set_cross_attention.  s<    4!7888DOV->?????r%   c                    d|_         d t          d          D             }|                     |d         |j                   |                     |d         |j                   |                     |d         |j                   |r!t          j        |j        d         |           nPt          j        |j        d         |d d                    t          j        |j        d         |dd                     |                     |j        d         |j	                   |j
        rD|j        j        |_        t          j        d                              |j                  |_        d S d S )	Nr$  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z*T5Loader.set_attention.<locals>.<listcomp>5  r  r%   r  r   r   r  r  r{  )queries_scaler  r   qkvr   r  r   ohas_relative_attention_biasr  r   r  rA   r   r  r   s        r#   r  zT5Loader.set_attention2  sG    CC%((CCCQ555Q555Q555 	@dk!nl;;;;dk!nl2A2.>???dk!nl122.>???B5550 	+4+L+SD(358G3D3D3I3I94 4D000	 	r%   c                     |j         |_        d S r   r   r   r9   re   r  s      r#   r   zT5Loader.set_layer_normH      &


r%   r/  )rK   rt   ru   r   rR   r   r   r   r   r  r  r  r  r  r   r0  r1  s   @r#   r  r    s        , , X,  (    0 0 0	= 	= 	=: : : :>@ @ @@ @ @@ @ @   ,' ' ' ' ' ' 'r%   r  	MT5Configc                   $    e Zd Zed             ZdS )	MT5Loaderc                     dS )NMT5ForConditionalGenerationr&   r|   s    r#   rR   zMT5Loader.architecture_nameN  s    ,,r%   N)rK   rt   ru   r   rR   r&   r%   r#   r  r  L  s-        - - X- - -r%   r  BloomConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )BloomLoaderc                     dS )NBloomForCausalLMr&   r|   s    r#   rR   zBloomLoader.architecture_nameU      !!r%   c           	      "   t           j                            |j        j        |j        j        dt          j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j                   |S )NT)r   r   r   alibialibi_use_positive_positions)r
   rq  r   r?   r  r  r   
ActivationGELUTanhr   r   r  r   r   r   rw  s      r#   r   zBloomLoader.get_model_specY  s    ;GGL L"-6 $)- H 
 
 	u'8999/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zBloomLoader.get_vocabularyh  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zBloomLoader.set_vocabularyq  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zBloomLoader.set_configt  r  r%   c                 *   d|_         |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   t          |j
        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j        |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r)  r   r,  word_embeddingsr   r   word_embeddings_layernormr  r  r   r   r  r   r  set_qkv_linearr   r  r  r   r	  r  r  r  r  r
  r	  r  r  s        r#   r   zBloomLoader.set_decodery  s    %DOV-CDDDD4f6VWWWDOV[999!$TZ!:!: 	N 	NJ)4e6K   )03$4$.  
 OO)03U5I5O   )5+I   OOJN3UY5LMMMOOJN3UY5LMMMM#	N 	Nr%   c                 v   |j         }|                    |dd|j        d                   }|                    dd          }|                    d|j        d                   }|j        }|                    |dd          }|                    dd          }|                    d          }||_         ||_        d S )Nr  r  r   r   )r   r  rB  r   r   )r9   re   r   r  r   r   s         r#   r  zBloomLoader.set_qkv_linear  s    	1b&,r2BCC!!!Q''FL$455{||Iq"--~~a##||B			r%   )rK   rt   ru   r   rR   r   r   r   r   r   r  r0  r1  s   @r#   r  r  S  s        " " X"      ) ) )/ / /
N N N2      r%   r  	MPTConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )	MPTLoaderc                     dS NAutoModelForCausalLMr&   r|   s    r#   rR   zMPTLoader.architecture_name      %%r%   c                     t           j                            |j        j        |j        j        dt          j        j        d          }| 	                    |j
        |j                   |S )NT)r   r   r  )r
   rq  r   r?   n_layersn_headsr   r  GELUr   r   r  rw  s      r#   r   zMPTLoader.get_model_spec  sa    ;GGL!L "-2 H 
 
 	u'8999r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zMPTLoader.get_vocabulary  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zMPTLoader.set_vocabulary  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zMPTLoader.set_config  r  r%   c                     |                      |j        |j                   |                     |j        |j                   d|_        |j        j        |j        _        t          |j
        |j                  D ]\  }}|                     |j        j        |j                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r   r,  r  r   r  norm_fr)  r   r   r   r   blocksr   norm_1r   r   r  Wqkvr  r  norm_2r  up_projr	  	down_projr  s        r#   r   zMPTLoader.set_decoder  s>   DOVZ888DOV];;; %!%!7!$TZ!?!? 	J 	JJ
 9 DelSSSOOJ5<Q?QQQOOJ5<Q?ATUUU
 95<HHHOOJN3UY5FGGGOOJN3UY5HIIII	J 	Jr%   c                 Z    |j         |_        t          j        |j                  |_        d S r   )r   r   rE   
zeros_liker   r   s      r#   r   zMPTLoader.set_layer_norm  s#    ]
$TZ00			r%   )rK   rt   ru   r   rR   r   r   r   r   r   r   r0  r1  s   @r#   r  r    s        & & X&
 
 
    ) ) )/ / /
J J J 1 1 1 1 1 1 1r%   r  GemmaConfigc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )GemmaLoaderc                     dS )NGemmaForCausalLMr&   r|   s    r#   rR   zGemmaLoader.architecture_name  r  r%   c                 @   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          j                            |||dk    rt          j        j	        nt          j        j
        dddddt          |j         dd	          ||j         j        
          }|                     |j        |j                   |                     |j        j        |j                   |j         j        dz  |j        j        _        |S )Nnum_key_value_headshidden_activationr   r   Tr   Fr   '  )	r   r   r  r  r  r  rotary_basenum_heads_kvr        ?r?   rr  rs  rQ   r
   rq  r   r   r  r  r  r  r   r   rb   r   r   r   rv  r,  multiply_by_sqrt_depthr9   rb   r,  r  r  activation_configre   s          r#   r   zGemmaLoader.get_model_spec  s   \3
L4	u|-BINN9$$L#L-/B
 
  ;GG %.. &++ +4#lEBB%\* H 
 
$ 	u{333/???9>9QSV9V6r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r   zGemmaLoader.get_vocabulary      ''y99L+c&kk9	y!! 	/ 	/AMM/A-....<"S[[005el556Fr%   c                 0    |                     |           d S r   r|  r   s      r#   r   zGemmaLoader.set_vocabulary  r~  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r   r   r   r?   rms_norm_epslayer_norm_epsilonr   s       r#   r   zGemmaLoader.set_config  6    $.$.$.$)L$=!!!r%   c                 ,    |j         |_        d|_        d S r<  r   r   layer_norm_use_residualr  s      r#   r   zGemmaLoader.set_layer_norm      &
'+$$$r%   c                    d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          |j	        |j
                  D ]\  }}|                     |j        j        |j                   |                     |j        j        |j                   |j        j        j        }|j        j        j        }|j        j        j        }|j        j        j        }t+          j        |||g          |j        j        d         _        ||j        j        d         _        |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   tA          |d           tA          |d           tC          j"                     d S NTFr   r   r  r  )#r)  r=  r   r,  r.  r   r  normr   r   r  r   r  r  r  r  r  r   r  r  o_projrE   r  r   r   r  r  	gate_projr  r  r	  r  delattrgccollect	r9   re   r   r  r   wqwkwvr  s	            r#   r   zGemmaLoader.set_decoder  s    $).&DOV-@AAADOV[999!$TZ!?!? 	 	J)4e6K   )5+I   '.B'.B'.B'.B9>BB<9P9PJ%,Q/69;J%,Q/6OOJN3UY5HIIIOOJN959;LMMMOOJN3UY5HIIIE;'''E5!!!JLLLL-	 	r%   rK   rt   ru   r   rR   r   r   r   r   r   r   r0  r1  s   @r#   r  r    s        " " X"! ! !F	 	 	 	 	) ) )> > >, , ,      r%   r  Gemma2Configc                   R     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
 xZS )Gemma2Loaderc                     dS )NGemma2ForCausalLMr&   r|   s    r#   rR   zGemma2Loader.architecture_name;      ""r%   c                 B   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          j                            |||dk    rt          j        j	        nt          j        j
        dddddt          |j         dd	          ||j         j        d
          }|                     |j        |j                   |                     |j        j        |j                   |j         j        dz  |j        j        _        |S )Nr  r  r   r   Tr   Fr   r  )
r   r   r  r  r  r  r  r  r  pre_post_layer_normr  r   r"  s          r#   r   zGemma2Loader.get_model_spec?  s!   \3
L4	u|-BINN9$$L#L-/B
 
  ;GG %.. &++ +4#lEBB%\* $! H 
 
& 	u{333/???9>9QSV9V6r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r   zGemma2Loader.get_vocabularyc  r%  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zGemma2Loader.set_vocabularyn  r~  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r(  r   s       r#   r   zGemma2Loader.set_configq  r+  r%   c                 ,    |j         |_        d|_        d S r<  r-  r  s      r#   r   zGemma2Loader.set_layer_normw  r/  r%   c                 V   d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          |j	        |j
                  D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |j        j        j        }|j        j        j        }|j        j        j        }|j        j        j        }t3          j        |||g          |j        j        d         _        ||j        j        d         _        |                     |j        j        |j         j!                   |                     |j        j"        |j         j#                   |                     |j        j$        |j         j%                   tM          |d           tM          |d           tO          j(                     d S r1  ))r)  r=  r   r,  r.  r   r  r2  r   r   r  r  r  r  r  pre_feedforward_layer_normpre_feedforward_layernormpost_feedforward_layer_normpost_feedforward_layernormr  r  r   r  r  r3  rE   r  r   r   r   r  r  r  r4  r  r  r	  r  r5  r6  r7  r8  s	            r#   r   zGemma2Loader.set_decoder{  s    $).&DOV-@AAADOV[999!$TZ!?!? 	 	J
 ;U=RSSS4e6T   5u7V   68X   '.B'.B'.B'.B9>BB<9P9PJ%,Q/69;J%,Q/6OOJN3UY5HIIIOOJN959;LMMMOOJN3UY5HIIIE;'''E5!!!JLLLL;	 	r%   r<  r1  s   @r#   r?  r?  9  s        # # X#" " "H	 	 	 	 	) ) )> > >, , ,# # # # # # #r%   r?  LlamaConfigc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )LlamaLoaderc                     dS )NLlamaForCausalLMr&   r|   s    r#   rR   zLlamaLoader.architecture_name  r  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }|                     |j         d          \  }}}t          |j         dd           }|rd }	|j        dk    rt                              |j                  }	|	Dt          d|j        dd
                    t                                                              |j        }
|j        }nt          j        j        }	d }
d }t"          j                            ||t          j        j        dddd	d
|||||	|
|          }|                     |j        |j        |	           |                     |j        j        |j                   t          |j         dd           }|t8          j        j        k    r3|j        j        D ]&}|d         |j         _!        |d         |j         _"        '|S )Nr  r  quantization_configawqQuantization type 'T' is not yet implemented. The following Quantization types are currently supported: r<   Tr   Fr   r   r  r  r  r  r   r   r  r  r   quant_group_size
quant_bitsr   low_freq_factorhigh_freq_factor)#r?   rr  rs  rQ   r   quant_method_SUPPORTED_QUANTIZATIONrL   versionr   rN   rP   
group_sizebitsr   r   r   r
   rq  r   r  SWISHr   r   rb   r   r   r   r   RotaryScalingTypeLlama3r   r   rotary_low_freq_factorrotary_high_freq_factor)r9   rb   r,  r  r  r   r   r   rT  r   rY  rZ  re   r   r   s                  r#   r   zLlamaLoader.get_model_spec  s   \3
L4	u|-BINN9$$LAEAWAWL&B
 B
>2J &el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7"%!-! H 
 
$ 	u{J???/??? u|^TBB."B"III+  >J%?$; @L&@$<< r%   c                 :   t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |j        j        t	          |          k     r|d |j        j                 }|S r  r  r  s         r#   r   zLlamaLoader.get_vocabulary  r%  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zLlamaLoader.set_vocabulary  r~  r%   c                     |j         |_         |j        |_        |j        |j        nd|_        |j        j        |_        d S N r(  r   s       r#   r   zLlamaLoader.set_config  sH    $.$.#,#6#BI 	 %*L$=!!!r%   c                     |j         |_        d S r   r  r  s      r#   r   zLlamaLoader.set_layer_norm  r  r%   c                 T   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]D\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t(          j        j        k    r&t/          j        |j
        j        d         |           n?|t(          j        j        k    rdnd}t/          j        |j
        j        d         ||           |                     |j
        j        d         |j        j        |           |                     |j        j        |j        j        |           |                     |j        j         |j        j!        |           |                     |j        j"        |j        j#        |           tI          |d           tI          |d	           tK          j&                     Fd S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z+LlamaLoader.set_decoder.<locals>.<listcomp>  !    GGGK244GGGr%   r  r   r   r   r  r  r  'r)  r   r,  r.  r   r  r2  r   r   r  r   r  r  r  r  r   r  r  r  r  r   r   r   r   r  r   AWQ_GEMMfuse_linear_prequantr3  r  r  r4  r  r  r	  r  r5  r6  r7  r9   re   r   r   r  r   r!  cc_dims           r#   r   zLlamaLoader.set_decoder  s    %DOV-@AAADOV[999!$TZ!?!? ,	 ,	J)4e6K   )5+I   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLLY,	 ,	r%   rK   rt   ru   r   rR   r   r   r   r   r   r   r   r   r   r0  r1  s   @r#   rP  rP    s        " " X"@ @ @D	 	 	 	 	) ) )> > >' ' ' 4?3K3O 1 1 1 1 1 1 1 1r%   rP  Gemma3TextConfigGemma3Configc                   p     e Zd Zed             Zd Zd Z fdZd Zd Z	d Z
ej        j        fdZ xZS )	Gemma3Loaderc                     dS )NGemma3ForCausalLMr&   r|   s    r#   rR   zGemma3Loader.architecture_name9  rB  r%   c                 >    |j         j        dk    rt          j        S |S )Nrx  )rJ   rK   rG   Gemma3ForConditionalGenerationr9   r?   default_classs      r#   r>   zGemma3Loader.get_model_class=  s$     $66>>r%   c                    t          |j        d|j                  }|j        }|j        }t          |d|          }||k    rd }|j        }t          |dd          }t          |dd          }t          |dd          }	t          |d	d
          }
t          |dd           }|.t          |dd           fdt          |          D             }t          |dd           }|rR|j        dk    rt                              |j	                  }|t          d|j        z            |j        }|j        }nt          j        j        }d }d }t           j                            |||dk    rt          j        j        nt          j        j        ddd|d|	|||
d|||d          }|| _        t/          |          D ]\  }}|j        j        |         }|dk    rct5          j        d                              |          |j        _        t5          j        d                              d          |j        _        |dk    rbt5          j        d                              |	          |j        _        t5          j        d                              |
          |j        _        t          |j         d|j                   }| !                    |j        ||           | "                    |j        j#        |j$                   |S )Ntext_configr  r  r   r   @B rope_local_base_freqr  sliding_windowi   layer_types_sliding_window_patternc                 0    g | ]}|d z   z  dk    rdndS )r   r   full_attentionsliding_attentionr&   r   rT  sliding_window_patterns     r#   r   z/Gemma3Loader.get_model_spec.<locals>.<listcomp>`  sH         A!771<< %$,  r%   rT  rU  .Quantization type '%s' is not yet implemented.r   TF)r   r   r  r  r  r  r  r  r  r  rD  r   rY  rZ  qk_normr  float32r{  r   r  language_model)%rQ   r?   rr  rs  r  r  r]  r^  rL   r_  r   r`  ra  r   r   r   r
   rq  r   r  r  r  _layer_typesry  r   r   r  rA   r   r   r  r  rb   r   r   r   r   )r9   rb   r  r,  r  r  r  r#  r   r  r  r  rT  r   rY  rZ  re   rT  
layer_typer   
text_modelr  s                        @r#   r   zGemma3Loader.get_model_specD  sV   elM5<HH 2
3	{,A9MM9$$L'#,.A
 

 [,	BB
&/ 
  

 !.>EEk=$??%,6& &" &1    #:..	   &k3H$OO 	"/5884889L9TUU
!)D)67    3=,1JJ$15J#J  ;GG %.. &++ +4#,%) $!-!+ H 
 
2 ( '{33 	 	MAzL&q)E---358I3F3F3K3KJ3W3W$068hw6G6G6L6LQ6O6O$33222358I3F3F3K3K(4 4$0 79hw6G6G6L6L"7 7$3 U[*:EKHH
z:>>>/???r%   c                 R   t                                          ||          }t          |j        d|j                  }|j        t          |          z
  }t          |          D ]}|                    d|z             |j        t          |          k     r|d |j                 }|S Nr  r  r   r   rQ   r?   r   r   r  rX  r9   rb   rd   r   r  r  rT  rJ   s          r#   r   zGemma3Loader.get_vocabulary  s    ''y99elM5<HH*S[[8	y!! 	/ 	/AMM/A-....!CKK//4k445Fr%   c                 0    |                     |           d S r   r|  r   s      r#   r   zGemma3Loader.set_vocabulary  r~  r%   c                     |j         |_         |j        |_        t          |d          r<t          |j        t
                    r"|j                                        r	d|_        d S |j        |_        d S Nchat_templatez<end_of_turn>r   r   rS   r   r  rw   stripr   r   s       r#   r   zGemma3Loader.set_config  s{    $.$. I//	392C88	3 '--//	3
  /F(2Fr%   c                 ,    |j         |_        d|_        d S r<  r-  r  s      r#   r   zGemma3Loader.set_layer_norm  r/  r%   c                 v   d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          |j	        |j
                  D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d t/          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t8          j        j        k    r&t?          j         |j        j!        d         |           n?|t8          j        j"        k    rdnd}t?          j#        |j        j!        d         ||           |                     |j        j!        d         |j        j$        |           |                     |j%        j&        |j'        j(        |           |                     |j%        j)        |j'        j*        |           |                     |j%        j+        |j'        j,        |           t[          |d	           t[          |d
           t]          j/                     d S )NTFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z,Gemma3Loader.set_decoder.<locals>.<listcomp>  ro  r%   r  r   rp  r   r  r  r  )0r)  r=  r   r,  r.  r   r  r2  r   r   r  r  r  r  r  rJ  rK  rL  rM  r   q_normr  k_normr  r   r  r  r  r   r   r   r   r  r   rr  rs  r3  r  r  r  r4  r  r  r	  r  r5  r6  r7  rt  s           r#   r   zGemma3Loader.set_decoder  sV    $).&DOV-@AAADOV[999!$TZ!?!? >	 >	J
 ;U=RSSS4e6T   5u7V   68X  
 )0%/2H   )0%/2H  
 HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLL}>	 >	r%   rK   rt   ru   r   rR   r>   r   r   r   r   r   r   r   r   r   r0  r1  s   @r#   rz  rz  6  s         # # X#  ` ` `D
 
 
 
 
) ) )3 3 3, , , 4?3K3O D D D D D D D Dr%   rz  Gemma4TextConfigGemma4Configc                   p     e Zd Zed             Zd Zd Z fdZd Zd Z	d Z
ej        j        fdZ xZS )	Gemma4Loaderc                     dS )NGemma4ForCausalLMr&   r|   s    r#   rR   zGemma4Loader.architecture_name  rB  r%   c                 >    |j         j        dk    rt          j        S |S )Nr  )rJ   rK   rG   Gemma4ForConditionalGenerationr  s      r#   r>   zGemma4Loader.get_model_class  s"    $66>>r%   c                  
   t          |j        d|j                  }|j        }|j        }t          |d|          }||k    rd }t          |dd          }|dk    rt	          d|z            |j        }t          |d|          }t          |dd           }	t          |dd	          }
t          |d
d          }t          |dd           pi }|                    di           }|                    di           }t          |                    dd                    }t          |                    dd                    }t          |                    dd                    }t          ||z            }t          |dd          }t          |dd           }|dfdt          |          D             }t          |dd           }|rR|j
        dk    rt                              |j                  }|t	          d|j
        z            |j        }|j        }nt          j        j        }d }d }t$          j                            |||dk    rt          j        j        nt          j        j        ddd|d	||||d|||dd          }|| _        |
| _        t5          |          D ]\  }}|j        j        |         }t;          j        d                              d          |j         _!        |dk    rt;          j        d                               |          |j         _"        t;          j        d                              |          |j         _#        t;          j        d                               d          |j         _$        t;          j        d                               |          |j         _        |	1t;          j        d                               |	          |j         _%        H|dk    rbt;          j        d                              |          |j         _#        t;          j        d                               |          |j         _$        t          |j        d|j                  }t          |d!d           }|r1t;          j        d                              |          |j        _&        t          |j'        d"|j'                  }| (                    |j        ||           | )                    |j        j*        |j+                   |S )#Nr  r  num_kv_shared_layersr   ztGemma 4 KV-shared layers (num_kv_shared_layers=%d) are not yet supported. Use the 31B model which has no KV sharing.global_head_dimnum_global_key_value_headsattention_k_eq_vFr  r   rope_parametersr  r  r   r  r  partial_rotary_factorr$  r  i   r     c                 B    g | ]}t          |d z   z            rdndS )r   r  r  )rx   r  s     r#   r   z/Gemma4Loader.get_model_spec.<locals>.<listcomp>H  sK         Q"8899&##%  r%   rT  rU  r  r   T)r   r   r  r  r  r  r  r  r  r  rD  r   rY  rZ  r  v_normr  r{  final_logit_softcappingr  ),rQ   r?   rr  rs  r   r  rL   rd  r  r  r]  r^  r_  r`  ra  r   r   r   r
   rq  r   r  r  r  r  _attention_k_eq_vry  r   r   r  rA   r   r   r  r  r  r  r  r  rb   r   r   r   r   ) r9   rb   r  r,  r  r  r  r  r  num_global_kv_headsr  r#  rope_paramssliding_ropeglobal_roper  r   global_partial_factorglobal_rotary_dimr  r  rT  r   rY  rZ  re   rT  r  r   final_softcapr  r  s                                   @r#   r   zGemma4Loader.get_model_spec  s<   elM5<HH 2
3	{,A9MM9$$L  '{4JANN!##%H&'   '!+/@(KK &k3OQUVV #;0BEJJ#,.A
 

 k+<dCCIr"':B??!oo&6;;$\%5%5lF%K%KLL;??<CCDD
 !&koo6Ms&S&S T T2G GHH .>DDk=$??%&"    z**	  K &k3H$OO 	"/5884889L9TUU
!)D)67    3=,1JJ$15J#J  ;GG %.. &++ +4#,%) $!-!- H 
 
2 (!1 '{33 	 	MAzL&q)E13)1D1D1I1I#1N1NE .---24(72C2C2H2H%3 3$/ 468I3F3F3K3KJ3W3W$068hw6G6G6L6LQ6O6O$3020A0A0F0F0W0W$-&28:8I8I8N8N+9 9E(5 222358I3F3F3K3K(4 4$0 79hw6G6G6L6L"7 7$3 elM5<HH-FMM 	358I3F3F3K3K4 4DL0 U[*:EKHH
z:>>>/???r%   c                 R   t                                          ||          }t          |j        d|j                  }|j        t          |          z
  }t          |          D ]}|                    d|z             |j        t          |          k     r|d |j                 }|S r  r  r  s          r#   r   zGemma4Loader.get_vocabulary  s    ''y99elM5<HH*S[[8	y!! 	/ 	/AMM/A-....!CKK//4k445Fr%   c                 0    |                     |           d S r   r|  r   s      r#   r   zGemma4Loader.set_vocabulary  r~  r%   c                     |j         |_         |j        |_        t          |d          r<t          |j        t
                    r"|j                                        r	d|_        d S |j        |_        d S r  r  r   s       r#   r   zGemma4Loader.set_config  sy    $.$.I//	392C88	3 '--//	3
  /F(2Fr%   c                     |j         |_        d S r   r  r  s      r#   r   zGemma4Loader.set_layer_norm  r  r%   c                    d|_         d|_        |                     |j        |j                   |                     |j        |j                   t          | dd          }t          |j
        |j                  D ]\  }}|                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   t1          j        |j        j        j                                                                                  |j        j        _        |j        j        dk    }|o|}d tA          d          D             }	| !                    |	d         |j        j"        |           | !                    |	d	         |j        j#        |           |r)| !                    |	d
         |j        j#        |           n(| !                    |	d
         |j        j$        |           |tJ          j&        j'        k    r&tQ          j)        |j        j*        d         |	           n?|tJ          j&        j+        k    rd	nd}
tQ          j,        |j        j*        d         |	|
           | !                    |j        j*        d	         |j        j-        |           | !                    |j.        j/        |j0        j1        |           | !                    |j.        j2        |j0        j3        |           | !                    |j.        j4        |j0        j5        |           t          |dd           }|Ptm          j7        d          8                    |                                9                                          |_:        tw          |d           tw          |d           ty          j=                     d S )NTFr  r  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z,Gemma4Loader.set_decoder.<locals>.<listcomp>  ro  r%   r  r   rp  r   r  layer_scalarr  r  r  )>r)  r=  r   r,  r.  r   r  r2  rQ   r   r   r  r  r  r  r  rJ  rK  rL  rM  r   r  r  r  rE   	ones_liker   rd  numpyr  r   r  r  r   r  r  r  r   r   r   r   r  r   rr  rs  r3  r  r  r  r4  r  r  r	  r  r  rA   r   r   r  r5  r6  r7  )r9   re   r   r   r  r  r   is_full_attn
use_k_as_vr!  ru  lss               r#   r   zGemma4Loader.set_decoder  sE    $).&DOV-@AAADOV[999"4)<eDD!$TZ!?!? J	 J	J
 ;U=RSSS4e6T   5u7V   68X   )0%/2H   )0%/2H    6 =>>DDFFLLNN %,2 !?59IIL):lJGGeAhhGGGLOOQ!7J     OOQ!7J       OU_%;
        OU_%;
      [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     55B~*,(9*=*=*B*B288::??CTCT*U*U
'E;'''E5!!!JLLLLUJ	 J	r%   r  r1  s   @r#   r  r    s         # # X#  
E E EN    ) ) )
3 
3 
3' ' ' 4?3K3O R R R R R R R Rr%   r  MistralConfigc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )MistralLoaderc                     dS )NMistralForCausalLMr&   r|   s    r#   rR   zMistralLoader.architecture_name	  r  r%   c                 T   |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }|                     |j         d          \  }}}t          |j         dd           }	|	r|	j        dk    rt                              |	j                  }
|
Dt          d|	j        dd	
                    t                                                              |	j        }|	j        }nt          j        j        }
d }d }t"          j                            ||t          j        j        d
d
d
dd||||||
|||j         j                  }|                     |j        |j        |
           |                     |j        j        |j                   |S )Nr  r  r   r  rT  rU  rV  rW  r<   TF)r   r   r  r  r  r  r   r   r  r  r  r   rY  rZ  r  rp  )r?   rr  rs  rQ   r   r]  r^  rL   r_  r   rN   rP   r`  ra  r   r   r   r
   rq  r   r  rb  r  r   r   rb   r   r   r   )r9   rb   r,  r  r  r  r   r   r   rT  r   rY  rZ  re   s                 r#   r   zMistralLoader.get_model_spec	  s   \3
L4	u|-BINN9$$L /?CCAEAWAWL&B
 B
>2J &el4I4PP 	"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7"%)!-!\*# H 
 
( 	u{zJJJ/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zMistralLoader.get_vocabularyR	  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zMistralLoader.set_vocabulary[	  r~  r%   c                 p    |j         |_         |j        |_        |j        |_        |j        j        |_        d S r   r(  r   s       r#   r   zMistralLoader.set_config^	  r+  r%   c                     |j         |_        d S r   r  r  s      r#   r   zMistralLoader.set_layer_normd	  r  r%   c                 T   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]D\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t(          j        j        k    r&t/          j        |j
        j        d         |           n?|t(          j        j        k    rdnd}t/          j        |j
        j        d         ||           |                     |j
        j        d         |j        j        |           |                     |j        j        |j        j        |           |                     |j        j         |j        j!        |           |                     |j        j"        |j        j#        |           tI          |d           tI          |d	           tK          j&                     Fd S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-MistralLoader.set_decoder.<locals>.<listcomp>s	  ro  r%   r  r   rp  r   r  r  r  rq  rt  s           r#   r   zMistralLoader.set_decoderg	  s    %DOV-@AAADOV[999!$TZ!?!? +	 +	J)4e6K   )5+I   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLLW+	 +	r%   rv  r1  s   @r#   r  r  	  s        $ $ X$8 8 8t    ) ) )> > >' ' ' 4?3K3O 0 0 0 0 0 0 0 0r%   r  Qwen2Configc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )Qwen2Loaderc                     dS )NQwen2ForCausalLMr&   r|   s    r#   rR   zQwen2Loader.architecture_name	  r  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }|                     |j         d          \  }}}t          |j         dd           }|rd }	|j        dk    rt                              |j                  }	|	Dt          d|j        dd
                    t                                                              |j        }
|j        }nt          j        j        }	d }
d }t"          j                            ||t          j        j        dddd	d
|||||	|
|          }|                     |j        |j        |	           |                     |j        j        |j                   |S )Nr  r  rT  rU  rV  rW  r<   Tr   FrX  )r?   rr  rs  rQ   r   r]  r^  rL   r_  r   rN   rP   r`  ra  r   r   r   r
   rq  r   r  rb  r   r   rb   r   r   r   )r9   rb   r,  r  r  r   r   r   rT  r   rY  rZ  re   s                r#   r   zQwen2Loader.get_model_spec	  s   \3
L4	u|-BINN9$$LAEAWAWL&B
 B
>2J
 &el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7"%!-! H 
 
$ 	u{J???/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zQwen2Loader.get_vocabulary	  sg    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zQwen2Loader.set_vocabulary	  r~  r%   c                     |j         |j         n|j        |_         |j        |_        |j        |j        nd|_        |j        j        |_        d S rj  r   rh  r   r   r?   r)  r*  r   s       r#   r   zQwen2Loader.set_config	  c     ". $ 	
 %.#,#6#BI 	 %*L$=!!!r%   c                     |j         |_        d S r   r  r  s      r#   r   zQwen2Loader.set_layer_norm	  r  r%   c                 T   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]D\  }}|                     |j
        j        |j                   |                     |j        j        |j                   d t          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t(          j        j        k    r&t/          j        |j
        j        d         |           n?|t(          j        j        k    rdnd}t/          j        |j
        j        d         ||           |                     |j
        j        d         |j        j        |           |                     |j        j        |j        j        |           |                     |j        j         |j        j!        |           |                     |j        j"        |j        j#        |           tI          |d           tI          |d	           tK          j&                     Fd S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z+Qwen2Loader.set_decoder.<locals>.<listcomp>	  ro  r%   r  r   rp  r   r  r  r  rq  rt  s           r#   r   zQwen2Loader.set_decoder	  s    %DOV-@AAADOV[999!$TZ!?!? -	 -	J)4e6K   )5+I   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLL[-	 -	r%   rv  r1  s   @r#   r  r  	  s        " " X"6 6 6p    ) ) )
> 
> 
>' ' ' 4?3K3O 2 2 2 2 2 2 2 2r%   r  Qwen3Configc                   j     e Zd Zed             Zd Z fdZd Zd Zd Z	e
j        j        fdZ xZS )Qwen3Loaderc                     dS )NQwen3ForCausalLMr&   r|   s    r#   rR   zQwen3Loader.architecture_name)
  r  r%   c                 p   |j         j        }|j         j        }t          |j         d|          }t          |j         d|j         j        |z            }||k    rd }|                     |j         d          \  }}}t          |j         dd           }	|	rd }
|	j        dk    rt                              |	j	                  }
|
Dt          d|	j        dd                    t                                                              |	j        }|	j        }nt          j        j        }
d }d }t$          j                            ||t          j        j        d	d	d	|j         j        d
|||||d	|
||          }|                     |j        |j        |
           |                     |j        j        |j                   |S )Nr  r  r  rT  rU  rV  rW  r<   TF)r   r   r  r  r  r  r   r   r  r  r  r  r   rY  rZ  )r?   rr  rs  rQ   rv  r   r]  r^  rL   r_  r   rN   rP   r`  ra  r   r   r   r
   rq  r   r  rb  r  r   r   rb   r   r   r   )r9   rb   r,  r  r  r  r   r   r   rT  r   rY  rZ  re   s                 r#   r   zQwen3Loader.get_model_spec-
  s   \3
L4	u|-BINNL*el&>)&K
 
 9$$LAEAWAWL)B
 B
>2J &el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3|,# 3"7"%!-!# H 
 
( 	u{J???/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zQwen3Loader.get_vocabularyi
  sg    ''y99L+c&kk9	y!! 	/ 	/AMM/A-....r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zQwen3Loader.set_vocabularyp
  r~  r%   c                     |j         |j         n|j        |_         |j        |_        |j        |j        nd|_        |j        j        |_        d S rj  r  r   s       r#   r   zQwen3Loader.set_configs
  r  r%   c                     |j         |_        d S r   r  r  s      r#   r   zQwen3Loader.set_layer_norm
  r  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          t          |j	        |j
                            D ]\  }\  }}|                     |j        j        |j                   |                     |j        j        |j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d t%          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d         |j        j        |           |t.          j        j        k    r&t5          j        |j        j        d         |           n?|t.          j        j        k    rdnd}t5          j        |j        j        d         ||           |                     |j        j        d         |j        j        |           |                     |j        j         |j!        j"        |           |                     |j        j#        |j!        j$        |           |                     |j        j%        |j!        j&        |           tO          |d           tO          |d	           tQ          j)                     d S )
NFc                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z+Qwen3Loader.set_decoder.<locals>.<listcomp>
  ro  r%   r  r   rp  r   r  r  r  )*r)  r   r,  r.  r   r  r2  ry  r   r   r  r   r  r  r  r  r  r  r  r   r  r  r  r   r   r   r   r  r   rr  rs  r3  r  r  r4  r  r  r	  r  r5  r6  r7  )	r9   re   r   r   	layer_idxr  r   r!  ru  s	            r#   r   zQwen3Loader.set_decoder
  s    %DOV-@AAADOV[999.7DJ8V8V.W.W 4	 4	*I*
E)4e6K   )5+I   )0%/2H   )0%/2H   HGeAhhGGGLOOQ!7J     OOQ!7J     OOQ!7J     [5999!*";"B1"E|TTTT(K,D,MMMST*-4Q7v   OO)03&%     OO')<     OO-uy/@Z     OO')<     E;'''E5!!!JLLLLi4	 4	r%   rv  r1  s   @r#   r  r  '
  s        " " X": : :x    ) ) )
> 
> 
>' ' ' 4?3K3O 9 9 9 9 9 9 9 9r%   r  MixFormerSequentialConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )MixFormerSequentialLoaderc                     dS r   r&   r|   s    r#   rR   z+MixFormerSequentialLoader.architecture_name
  r  r%   c           
      Z   t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j        d         j                   |S )NTFr,  r  r   r   r  r  r  r  r  )r
   rq  r   r?   r  r  r   r   r  r   r   r  r   r   r   rw  s      r#   r   z(MixFormerSequentialLoader.get_model_spec
  s    ;GG|+l)-el.NO|.#"" H 	
 	
 	u|444/b1A1HIIIr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   z(MixFormerSequentialLoader.get_vocabulary
  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   z(MixFormerSequentialLoader.set_vocabulary
  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   z$MixFormerSequentialLoader.set_config
  r  r%   c                    d|_         |                     |j        |d         j                   |                     |j        |d         j                   t          |j        |dd                   D ]\  }}|                     |j	        |j                   | 
                    |j        j        d         |j        j                   | 
                    |j        j        d         |j        j                   | 
                    |j        j        |j        j                   | 
                    |j        j        |j        j                   d S )NFr   r  r   )r)  r   r,  r  r   r  lnr   r   r  r   r   r   mixerr  r  r  r  r  r  r	  r
  r  s        r#   r   z%MixFormerSequentialLoader.set_decoder
  s    %DOVAY];;;DOVBZ];;;!$TZ"!>!> 	D 	DJ
 <ehGGGOOJ5<Q?AQRRROOJ5<Q?AUVVVOOJN3UY]CCCOOJN3UY]CCCC	D 	Dr%   r  r1  s   @r#   r  r  
  s        & & X&       ) ) )/ / /

D 
D 
D 
D 
D 
D 
Dr%   r  	PhiConfigc                   L     e Zd Zed             Zd Z fdZd Zd Zd Z	 xZ
S )	PhiLoaderc                     dS r   r&   r|   s    r#   rR   zPhiLoader.architecture_name
  r  r%   c           
         t           j                            |j        j        |j        j        dt          |j        j                 |j        j        ddd          }| 	                    |j
        |j                   |                     |j
        j        |j        j                   |                     |j
        j        |j        j                   |S )NTFr  )r
   rq  r   r?   r  r  r   r   r  r   r   r  r   r   r   r   r   r  r  rw  s      r#   r   zPhiLoader.get_model_spec
  s    ;GG|+l)-el.NO|.#"" H 	
 	
 	u'8999/1EFFFDL3U]5EFFFr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zPhiLoader.get_vocabulary	  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zPhiLoader.set_vocabulary  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zPhiLoader.set_config  r  r%   c                 F   d|_         |                     |j        |j        j                   t          |j        |j                  D ]\  }}|                     |j	        |j
                   |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   d S r  )r)  r   r,  embdr  r   r   r  r   r  r  r   r   r   r  r  r  r  r  r  r  r	  r
  r  s        r#   r   zPhiLoader.set_decoder  s     %DOV[_===!$TZ!:!: 	D 	DJ
 <ehGGGOOJ5<Q?AQRRROOJ5<Q?AUVVVOOJN3UY]CCCOOJN3UY]CCCC	D 	Dr%   r  r1  s   @r#   r   r   
  s        & & X&  "    ) ) )/ / /
	D 	D 	D 	D 	D 	D 	Dr%   r   
Phi3Configc                   p     e Zd Zed             Zd Z fdZd Zd Zd Z	d Z
ej        j        fdZ xZS )	
Phi3Loaderc                     dS r   r&   r|   s    r#   rR   zPhi3Loader.architecture_name(  r  r%   c                    |j         j        }|j         j        }t          |j         d|          }||k    rd }t          |j         dd          }t          |j         dd          }t          |j         dd           }|r~t                              |d                   }|                    dd          }	|Et          d	|d         d
d                    t                                                              nd }d}	t          |j         dd           }
|
rd }|
j	        dk    rt                              |
j                  }|Dt          d|
j	        dd                    t                                                              |
j        }|
j        }nt          j        j        }d }d }t"          j                            ||t          j        j        ddddd||	t          |j         dd          ||||||          }|                     |j        |j        |           |                     |j        j        |j                   |S )Nr   original_max_position_embeddingsr   max_position_embeddingsr   r   r   r   r   r   r<   rT  rU  rV  rW  TFr   r  )r   r   r  r  r  r  r   r   r  r  r  r  r   rY  rZ  )r?   rr  rs  rQ   r   rL   r   rN   rP   r]  r^  r_  r`  ra  r   r   r   r
   rq  r   r  rb  r   r   rb   r   r   r   )r9   rb   r,  r  r  r  r  r   r   r   rT  r   rY  rZ  re   s                  r#   r   zPhi3Loader.get_model_spec,  s|   \3
L4	u|-BINN9$$L+2L<a,
 ,
( #*%,8QST"U"Uu|^TBB 	&"9"="=l6>R"S"S$0$4$4Xq$A$A!"*)) $F+++TYY7N7S7S7U7U-V-V-VX   + #'$%! &el4I4PP 	J"/5884889L9TUU
!)) ,888		"9">">"@"@AAA	    3=,1JJ$15J#J;GG"-3# 3"7lEBB-M$;%!-!# H 
 
( 	u{J???/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zPhi3Loader.get_vocabularyu  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zPhi3Loader.set_vocabulary~  r~  r%   c                 N    |j         |_         |j        |_        |j        |_        d S r   r  r   s       r#   r   zPhi3Loader.set_config  r  r%   c                     |j         |_        d S r   r  r  s      r#   r   zPhi3Loader.set_layer_norm  r  r%   c                     t          j        |t           j                  |_        t          j        |t           j                  |_        d S )N)rA   )rE   tensorr  rotary_scaling_long_factorrotary_scaling_short_factor)r9   re   r  r  s       r#   set_rotary_embeddingsz Phi3Loader.set_rotary_embeddings  sL     +0,&em+
 +
 +
' ,1<'u},
 ,
 ,
(((r%   c                 .   d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]\  }}|                     |j
        j        |j                   |                     |j        j        |j                   |                     |j
        j        d         |j        j        |           |                     |j
        j        d         |j        j        |           |j        j        j        K|j        j        j        :|                     |j
        |j        j        j        |j        j        j                   |t.          j        j        k    rL|j        j        j                            dd          \  }}||j        j        _        ||j        j        _        n|j        j        j                             dd          \  }}	|j        j        j!                            dd          \  }
}|j        j        j"                            dd          \  }}||j        j        _        |
|j        j        _#        ||j        j        _$        |	|j        j        _        ||j        j        _#        ||j        j        _$        |                     |j        j%        |j        j&        |           tO          |d           tO          |d           tQ          j)                     d S )	NFr   rp  r   r  r  r  r  )*r)  r   r,  r.  r   r  r2  r   r   r  r   r  r  r  r   r   r  r  r3  
rotary_emblong_factorshort_factorr  r   r   r   r  gate_up_projr   r  r  r  r   r   r   r   r   r	  r  r5  r6  r7  )r9   re   r   r   r  r   r4  r  gate_qweight
up_qweightgate_scales	up_scalesgate_qzeros	up_qzeross                 r#   r   zPhi3Loader.set_decoder  s    %DOV-@AAADOV[999!$TZ!?!? 7	 7	J)4e6K   )5+I   OO)03(%    
 OO)03&%     *6BO.;G**-O.:O.;   [5999%*Y%;%B%H%HPQ%H%R%R"	71:
'.7>
-44 ,19+A+I+O+O1 ,P , ,(j */)?)F)L)LQTU)L)V)V&Y).)?)F)L)LQTU)L)V)V&Y1=
'.7B
'46A
'37A
-4=F
-:<E
-9OO')<     E;'''E5!!!JLLLLo7	 7	r%   )rK   rt   ru   r   rR   r   r   r   r   r   r  r   r   r   r   r0  r1  s   @r#   r
  r
  &  s        & & X&G G GR    ) ) )/ / /
' ' '
 
 
 4?3K3O < < < < < < < <r%   r
  RWConfigc                   Z     e Zd Zed             Zd Zd Z fdZd Zd Z	d Z
d
d	Z xZS )RWLoaderc                     dS r   r&   r|   s    r#   rR   zRWLoader.architecture_name  r  r%   c                     |j         j        | _        |j         j        | _        t          |j         dd           | _        d| _        d S )N	n_head_kvnum_kv)r?   r  _num_layersr  
_num_headsrQ   _num_heads_kv_num_kv_attrr   s     r#   get_falcon_speczRWLoader.get_falcon_spec  s?     </,-$U\;EE$r%   c                    |                      |           t          |j        dd          rd}n| j        }t          j                            | j        | j        dt          j
        j        |j        j        dd|j        j        rdnd d|j        j        |dk    |          }|                     |j        |j                   |                     |j        j        |j                   |S )Nmulti_queryFr   Tr   )
r   r   r  r  scale_alibir  r  r  r  r  )r.  rQ   r?   r,  r
   rq  r   r*  r+  r   r  r  r  rotaryparallel_attnr   r   r  r   r   r   )r9   rb   r  re   s       r#   r   zRWLoader.get_model_spec  s    U###5<66 	.LL-L;GGO"-2,$)-!L/9qqT##l8*a/% H 
 
 	u'8999/???r%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zRWLoader.get_vocabulary  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zRWLoader.set_vocabulary  r~  r%   c                 N    |j         |_        |j         |_         |j         |_        d S r   )r   r   r   r   s       r#   r   zRWLoader.set_config  r  r%   c                    d|_         |                     |j        |j                   |                     |j        |j                   t          |j        |j	                  D ]\  }}t          |d          rA|                     |j        |j                   |                     |j        |j                   n{t          |d          r!|                     |j        |j                   nJ|                     |j        j        |j                   |                     |j        j        |j                   t)          |j        | j                  }|dk    r1|                     |j        j        d         |j        j                   nN|                     |j        j        d         |j        j        |j        j        ||j        j        k     r|nd            |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                    d S )NFln_attnr  r   r   )!r)  r   r,  r  r   r  r  r   r   r  rS   r  r8  r  ln_mlpr  r  r   r  r  rQ   r-  r   r   r  r  r  r	  r  r  r
  r	  r  )r9   re   r   r  r   r)  s         r#   r   zRWLoader.set_decoder
  sL    %DOV-CDDDDOV[999!$TZ!:!: !	N !	NJui(( ##J$?OOO##J$H%,WWWW%899 ##J$@%BWXXXX##-8%:O   ##N-u/M   U143DEEF{{-4Q7(8   
 ##-4Q7(8(2$u';'EEEFF4	   OO)03U5I5O   OOJN3UY5LMMMOOJN3UY5LMMMMC!	N !	Nr%   Nc                 :   |j         }|[|                    |dd|j        d                   }|                    dd          }|                    d|j        d                   }n|j        d         ||dz  z   z  }|                    d||z  dz   ||j        d                   }|                    ||z  ddgd          \  }}}	t          j        |                    ||z  d          |                    ||z  d          |	                    ||z  d          g          }||_         |j        |j        }
|C|
                    |dd          }
|
                    dd          }
|
                    d          }
n|
                    d||z  dz   |          }
|
                    ||z  ddgd          \  }}}	t          j        |                    ||z            |                    ||z            |	                    ||z            g          }
|
|_        d S d S )Nr  r  r   r   r  r  )r   r  rB  r   splitrE   r  r   )r9   re   r   r  r)  r   r  r  r  r  r   s              r#   r  zRWLoader.set_qkv_linear2  s)   >^^Iq"fl26FGGF%%a++F^^BR(899FF|A9vz+ABH^^I'!+Xv|B7G F llI$7A#>AlFFGAq!YIIi(2B77IIfx/44IIfx/44 F ;";D~||Iq"55~~a++||B''||B	V(;a(?JJ**i6&91a%@a*HH1ay		)h"677		&8"344		&8"344  DIII% #"r%   r   )rK   rt   ru   r   rR   r.  r   r   r   r   r   r  r0  r1  s   @r#   r%  r%    s        & & X&% % %  6    ) ) )/ / /
&N &N &NP) ) ) ) ) ) ) )r%   r%  FalconConfigc                       e Zd Zd ZdS )FalconLoaderc                     |j         j        | _        |j         j        | _        t          |j         dd           | _        d| _        d S )Nnum_kv_heads)r?   rr  r*  rs  r+  rQ   r,  r-  r   s     r#   r.  zFalconLoader.get_falcon_spec`  s?     <9,:$U\>4HH*r%   N)rK   rt   ru   r.  r&   r%   r#   r>  r>  ^  s#        + + + + +r%   r>  DistilBertConfigc                   6    e Zd Zed             Zd Zd Zd ZdS )DistilBertLoaderc                     dS )NDistilBertModelr&   r|   s    r#   rR   z"DistilBertLoader.architecture_namei  r  r%   c                 &   t          j        |j        j        |j        j        dt
          |j        j                 d          }t          j        |          }d|j        _	        | 
                    |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   t'          |j        j        |j        j                  D ]\  }}d t-          d          D             }|                     |d         |j        j                   |                     |d         |j        j                   |                     |d         |j        j                   t9          j        |j        j        d         |           |                     |j        j        d         |j        j                    |                     |j        j!        |j"                   |                     |j#        j$        |j#        j%                   |                     |j#        j&        |j#        j'                   |                     |j#        j!        |j(                   |S )	NFTr   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z3DistilBertLoader.get_model_spec.<locals>.<listcomp>  ro  r%   r  r   r  ))r
   TransformerEncoderSpecr?   r  r  r   r   TransformerEncoderModelSpecr   r)  r   r,  r  r   r*  position_embeddingsr   r   	LayerNormr   r   r  r  r   r  q_link_linv_linr   r  r   r   out_linr  sa_layer_normr  r  lin1r	  lin2output_layer_normr9   rb   encoder_specre   r  r   r!  s          r#   r   zDistilBertLoader.get_model_specm  sP   '>L!L -el.EF $
 
 
  ;
 
 ).%L#A&(8(H	
 	
 	
 	##L+U-=-Q	
 	
 	
 	L,e.>.H	
 	
 	
 "%T\%79J9P!Q!Q 	T 	TJGGeAhhGGGLOOLOU_-BCCCOOLOU_-BCCCOOLOU_-BCCCj7>qA<PPPOO)03U_5L   )4e6I   OOJN3UY^DDDOOJN3UY^DDD
 95;RSSSSr%   c                 0    |                     |           d S r   r|  r   s      r#   r   zDistilBertLoader.set_vocabulary  r~  r%   c                 ,    |j         |_         d|_        d S )Ng-q=)r   r*  r   s       r#   r   zDistilBertLoader.set_config  s    $.$)!!!r%   N)rK   rt   ru   r   rR   r   r   r   r&   r%   r#   rC  rC  g  s[        ! ! X!* * *X) ) )* * * * *r%   rC  
BertConfigc                   F     e Zd Zed             Zd Z fdZd Zd Z xZ	S )
BertLoaderc                     dS )N	BertModelr&   r|   s    r#   rR   zBertLoader.architecture_name  s    {r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }t          j        |dt          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )NabsoluteFTr  r   r   r   num_source_embeddingsembeddings_mergepooling_layerpooling_activationr   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-BertLoader.get_model_spec.<locals>.<listcomp>  ro  r%   r  )0r?   position_embedding_typer
   rH  rr  rs  r   r  r   EmbeddingsMergeADDrI  r  Tanhr   r)  r   r,  r  token_type_embeddingsr   r*  rJ  r   r   rK  r   pooler_densepoolerr	  r   r   r  r  r9   queryr   valuer   r  r   r   outputr  r  r  intermediater	  rT  s          r#   r   zBertLoader.get_model_spec  s   |3zAAAA'>L*L,-el.EF $"#(8<
 
 
  ;*5:
 
 
 ).%L#A&(8(H	
 	
 	
 	L#A&(8(N	
 	
 	
 	##L+U-=-Q	
 	
 	
 	L,e.>.H	
 	
 	
 	)5<+=>>>!$T\%79L!M!M 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                     t                                          ||          }|j        j        t	          |          z
  }t          |          D ]}|                    d|z             |S r  r  r  s         r#   r   zBertLoader.get_vocabulary  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zBertLoader.set_vocabulary  r~  r%   c                 @    |j         |_         |j        j        |_        d S r   r   r?   layer_norm_epsr*  r   s       r#   r   zBertLoader.set_config       $.$)L$?!!!r%   )
rK   rt   ru   r   rR   r   r   r   r   r0  r1  s   @r#   rZ  rZ    s          X6 6 6p    ) ) )@ @ @ @ @ @ @r%   rZ  XLMRobertaConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )XLMRobertaLoaderc                     dS )N#XLMRobertaForSequenceClassificationr&   r|   s    r#   rR   z"XLMRobertaLoader.architecture_name  s    44r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j        j                   |                     |j        j        d         |j        j        j                   |                     |j        j        |j        j        j                   |                     |j        j        |j        j        j                   |r*|                     |j        |j        j        j                   t?          |j        j         |j        j        j                   D ]\  }}d	 tC          d
          D             }|                     |d         |j"        j#        j$                   |                     |d         |j"        j#        j%                   |                     |d         |j"        j#        j&                   tO          j(        |j)        j*        d         |           |                     |j)        j*        d         |j"        j+        j                   |                     |j)        j,        |j"        j+        j                   |                     |j-        j.        |j/        j                   |                     |j-        j0        |j+        j                   |                     |j-        j,        |j+        j                   |S )Nr^  FTr  r_  rb  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z3XLMRobertaLoader.get_model_spec.<locals>.<listcomp>!  ro  r%   r  )1r?   rf  r
   rH  rr  rs  r   r  r   rg  rh  robertarl  rI  r  ri  r   r)  r   r,  r  rj  r   r*  rJ  r   r   rK  r   rk  r	  r   r   r  r  r9   rm  r   rn  r   r  r   r   ro  r  r  r  rp  r	  r9   rb   rU  rc  re   r  r   r!  s           r#   r   zXLMRobertaLoader.get_model_spec  s   |3zAAAA'>L*L,-el.EF $"#(8<
 
 
 ='!MM M;'*5:
 
 
 ).%L#A&(@(P	
 	
 	
 	L#A&(@(V	
 	
 	
 	##L+M$8	
 	
 	
 	L,em.F.P	
 	
 	
  	KOOD-u}/C/IJJJ!$T\%79N9T!U!U 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                 0    |                     |           d S r   r|  r   s      r#   r   zXLMRobertaLoader.set_vocabulary4  r~  r%   c                 @    |j         |_         |j        j        |_        d S r   rt  r   s       r#   r   zXLMRobertaLoader.set_config7  rv  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S Npadding_idxr   r   r   r   s       r#   r   z'XLMRobertaLoader.set_position_encodings;  F    22A::!^FQJLL9DNNN :r%   N	rK   rt   ru   r   rR   r   r   r   r   r&   r%   r#   ry  ry    sm        5 5 X5< < <|) ) )@ @ @: : : : :r%   ry  RobertaConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )RobertaLoaderc                     dS )NRobertaModelr&   r|   s    r#   rR   zRobertaLoader.architecture_nameD  s    ~r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |r%|                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )Nr^  FTr  r_  rb  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z0RobertaLoader.get_model_spec.<locals>.<listcomp>s  ro  r%   r  0r?   rf  r
   rH  rr  rs  r   r  r   rg  rh  rl  rI  r  ri  r   r)  r   r,  r  rj  r   r*  rJ  r   r   rK  r   rk  r	  r   r   r  r  r9   rm  r   rn  r   r  r   r   ro  r  r  r  rp  r	  r  s           r#   r   zRobertaLoader.get_model_specH     |3zAAAA'>L*L,-el.EF $"#(8<
 
 
 <!MM M;'*5:
 
 
 ).%L#A&(8(H	
 	
 	
 	L#A&(8(N	
 	
 	
 	##L+0	
 	
 	
 	L,e.>.H	
 	
 	
  	COOD-u|/ABBB!$T\%79L!M!M 	S 	SJGGeAhhGGGLOOLOU_-A-GHHHOOLOU_-A-EFFFOOLOU_-A-GHHHj7>qA<PPPOO)03U_5K5Q   )4eo6L6V   OOJN3U5G5MNNNOOJN3U\5GHHH
 95<;QRRRRr%   c                 0    |                     |           d S r   r|  r   s      r#   r   zRobertaLoader.set_vocabulary  r~  r%   c                 @    |j         |_         |j        j        |_        d S r   rt  r   s       r#   r   zRobertaLoader.set_config  rv  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S r  r   r   s       r#   r   z$RobertaLoader.set_position_encodings  r  r%   Nr  r&   r%   r#   r  r  B  sm          X< < <|) ) )@ @ @: : : : :r%   r  CamembertConfigc                   <    e Zd Zed             Zd Zd Zd Zd ZdS )CamembertLoaderc                     dS )NCamembertModelr&   r|   s    r#   rR   z!CamembertLoader.architecture_name  rn  r%   c           	         |j         j        dk    sJ t          j        |j         j        |j         j        dt          |j         j                 ddt          j	        j
                  }|j        d}nd}t          j        ||t          j        j                  }d|j        _        |                     |j        j        d         |j        j                   |                     |j        j        d         |j        j                   |                     |j        j        |j        j                   |                     |j        j        |j        j                   |r%|                     |j        |j        j                   t=          |j        j        |j        j                  D ]\  }}d	 tA          d
          D             }|                     |d         |j!        j"        j#                   |                     |d         |j!        j"        j$                   |                     |d         |j!        j"        j%                   tM          j'        |j(        j)        d         |           |                     |j(        j)        d         |j!        j*        j                   |                     |j(        j+        |j!        j*        j                   |                     |j,        j-        |j.        j                   |                     |j,        j/        |j*        j                   |                     |j,        j+        |j*        j                   |S )Nr^  FTr  r_  rb  r   r   c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z2CamembertLoader.get_model_spec.<locals>.<listcomp>  ro  r%   r  r  r  s           r#   r   zCamembertLoader.get_model_spec  r  r%   c                 0    |                     |           d S r   r|  r   s      r#   r   zCamembertLoader.set_vocabulary  r~  r%   c                 @    |j         |_         |j        j        |_        d S r   rt  r   s       r#   r   zCamembertLoader.set_config  rv  r%   c                 ~    |j         |_        t          |dd          }|dk    r|j        |dz   d          |_        d S d S r  r   r   s       r#   r   z&CamembertLoader.set_position_encodings  r  r%   Nr  r&   r%   r#   r  r    sm            X < < <|) ) )@ @ @: : : : :r%   r  c            	      P   t          j        t           j                  } |                     ddd           |                     dd           |                     d	d
d           |                     dd           |                     ddd           |                     ddd           t	          j        |            |                                 }t          |j        |j	        |j
        |j        dv |j        |j        |j                  }|                    |           d S )N)formatter_classz--modelTzaName of the pretrained model to download, or path to a directory containing the pretrained model.)requiredhelpz--activation_scaleszPath to the pre-computed activation scales. Models may use them to rescale some weights to smooth the intermediate activations and improve the quantization accuracy. See https://github.com/mit-han-lab/smoothquant.)r  z--copy_files+zWList of filenames to copy from the Hugging Face model to the converted model directory.)nargsr  z
--revisionz<Revision of the model to download from the Hugging Face Hub.z--low_cpu_mem_usage
store_truezNEnable the flag low_cpu_mem_usage when loading the model with from_pretrained.)actionr  z--trust_remote_codez*Allow converting models using custom code.)rV   int8_float16)r+   r,   r-   r.   r/   r0   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr)   rb   r+   r,   quantizationr.   r/   r0   convert_from_args)parserargs	converters      r#   mainr    s   $ >  F F	     :     	     K     ]    
 9     '''D%
0?)-HH00  I %%%%%r%   __main__))r   r   )r  r   )r     r  r   r  r   r  r  r  r  r  r  ))r  r  r  r  r  r  )r  r  )r  r     r  r   )r  r  )r  r  )r  )r  r  )r  r  r  r  )r  r  )r  r  )r  r  ))r  r  r  r   )r  r  )r  r  )r  r  )r  r  r  r  	   r   )r  r  )r  r  )r  
   )r  r   )r  r   )r  r  )r  r  )r  r  )r     r  r  r  r  )
)r  r  )r  r  )r  r   )r  r  r  )r  r  r  )r  r  )r  r  )r  r  )r  )   r   )r     )r  r     r  )   r   )r  r  )r  r  )   r  r  r  )   r  )r  r  )r  r     r   )r  r  )r  r  )r  r  )   r  ))   r  r  )r  r  r  r   r  )   r  )	)r     r  r  )r  r  )   r  )r  r  )r  r  )r  r  )r  r  ))r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )   r  )   r   )r  r  )   r  )
r  )r  r  )r  r  )r  r  r  r  r  )r  r  )   r   )r  r  )zopenai/whisper-tiny.enzopenai/whisper-tinyzopenai/whisper-base.enzopenai/whisper-basezopenai/whisper-small.enzopenai/whisper-smallzopenai/whisper-medium.enzopenai/whisper-mediumzopenai/whisper-largezopenai/whisper-large-v2zopenai/whisper-large-v3T5GemmaConfigc                   x    e Zd Zed             Zd Zd Zd Zd Ze	j
        j        fdZe	j
        j        fdZdS )	T5GemmaLoaderc                     dS )NT5GemmaForConditionalGenerationr&   r|   s    r#   rR   zT5GemmaLoader.architecture_name  rf  r%   c                 .    |j         j        dz   |_        d S )Nr$  )r   rc  r   r  s      r#   r   zT5GemmaLoader.set_layer_norm  s    &+c1


r%   c                 v   |j         j        }|j         j        }t          |j         dd          }|j        }t          |d|          }||k    rd }t          j        |j        |j        dt          |j	                 dd|j
        dt          |dd          |d||j
                  }|j        }t          |d|          }	|	|k    rd }	t          j        |j        |j        dt          |j	                 ddd|j
        dt          |dd          |dd|	|j
        	          }
t          j        ||
          }|                     |j        |j        j        |           |                     |j        |j        j        |t           j        j                   |                     |j        j        |j        j        j                   |S )
Nr  i   r  TFr   r  )r   r   r  r  r  r  r  r  rD  r  r  )r   r   r  r  with_encoder_attentionr  r  r  r  rD   external_pre_post_encoder_layersr  r  )r?   r   r   rQ   rs  r
   rH  rr  r   r  r  TransformerDecoderSpecr   r   rb   r   r   r   r   r   r   r.  )r9   rb   encoder_configdecoder_configr  encoder_num_headsencoder_num_heads_kvr   decoder_num_headsdecoder_num_heads_kvr   re   s               r#   r   zT5GemmaLoader.get_model_spec  s   -- /?FF*>&13D 
  
  #444#' "9,.-n.NO%.#eDD) $-#,
 
 
  +>&13D 
  
  #444#' "9,.-n.NO#'%.#eDD) $-1-#,
 
 
$  /AAu{':NKKKLK$(		
 	
 	
 	/1D1QRRRr%   c                 Z    |                     |           |                    |           d S r   r   r   s      r#   r   zT5GemmaLoader.set_vocabulary  r   r%   c                    |j         |_         |j        |_        |j        |_        t          |j        d          r|j        j        j        |_        n.t          |j        d          r|j        j        |_        nd|_        |j         |_        d S )Nr   r)  gư>)	r   r   r   rS   r?   r   r)  r*  r   r   s       r#   r   zT5GemmaLoader.set_config  s    $.$.$.5<++ 	-(-(<(IF%%U\>22 	-(-(AF%%(,F%%.%8"""r%   c                    d|_         t          |j        t                    r|j        d         n|j        }|                     ||j                   |j        dz  |_        |                     |j	        |j
                   |}t          t          |j        |j                            D ](\  }\  }}	|                     |j        |	j                   |                     |j        |	j                   d t'          d          D             }
|                     |
d         |	j        j        |           |                     |
d         |	j        j        |           |                     |
d         |	j        j        |           t3          j        |j        j        d         |
           |                     |j        j        d         |	j        j        |           |                     |j        |	j                   |                     |j         |	j!                   |                     |j"        j#        |	j$        j%        |           |                     |j"        j&        |	j$        j'        |           |                     |j"        j(        |	j$        j)        |           tU          |	d	           tU          |	d
           tW          j,                     *d S )NTr   r  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-T5GemmaLoader.set_encoder.<locals>.<listcomp>  !    KKKQ 6 8 8KKKr%   r  rp  r   r  r  r  )-r)  r   r,  r-  r   r.  rv  r!  r   r  r2  ry  r   r   r  r  pre_self_attn_layernormr  post_self_attn_layernormr  r   r  r  r  r  r   r  r   r   r3  rJ  rK  rL  rM  r  r  r  r4  r  r  r	  r  r5  r6  r7  )r9   re   r   r  r   encoder_emb_specr   rT  r  r   qkv_split_layerss              r#   r   zT5GemmaLoader.set_encoder  s     !% #-T_d"C"CXDOA 	 	,g.BCCC2@2Lc2Q/DOW\:::&/DJ0N0N&O&O 1	 1	"A"
E+U-J   4e6T  
  LK%((KKKOO #U_%;
     OO #U_%;
     OO #U_%;
     j7>qACSTTTOO)03&%     5u7V   68X  
 OO')<     OO-uy/@Z     OO')<    
 E;'''E5!!!JLLLLc1	 1	r%   c                    d|_         d|_        |                     |j        |j                   |j        dz  |j        _        |                     |j        |j	                   t          t          |j        |j                            D ]j\  }\  }}|                     |j        |j                   |                     |j        |j                   d t%          d          D             }|                     |d         |j        j        |           |                     |d         |j        j        |           |                     |d	         |j        j        |           t1          j        |j        j        d         |           |                     |j        j        d         |j        j        |           |                     |j        |j                   |                     |j        |j                    |                     |j!        j        d         |j"        j        |           d
 t%          d	          D             }	|                     |	d         |j"        j        |           |                     |	d         |j"        j        |           t1          j        |j!        j        d         |	           |                     |j!        j        d	         |j"        j        |           |                     |j#        |j$                   |                     |j%        |j&                   |                     |j'        j(        |j)        j*        |           |                     |j'        j+        |j)        j,        |           |                     |j'        j-        |j)        j.        |           t_          |d           t_          |d           t_          |d           ta          j1                     ld S )NTFr  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-T5GemmaLoader.set_decoder.<locals>.<listcomp>\  r  r%   r  r   rp  r   r  c                 4    g | ]}t          j                    S r&   r  r  s     r#   r   z-T5GemmaLoader.set_decoder.<locals>.<listcomp>  s!    JJJA{577JJJr%   r  
cross_attnr  )2r)  r=  r   r,  r.  rv  r!  r   r  r2  ry  r   r   r  r  r  r  r  r  r   r  r  r  r  r   r  r   r   r3  )external_pre_encoder_attention_layer_normpre_cross_attn_layernorm*external_post_encoder_attention_layer_normpost_cross_attn_layernormr  r  rJ  rK  rL  rM  r  r  r  r4  r  r  r	  r  r5  r6  r7  )
r9   re   r   r  r   rT  r  r   r  kv_split_layerss
             r#   r   zT5GemmaLoader.set_decoderH  sD    !%).&DOV-@AAA1?1KS1P.DOV[999&/DJ0N0N&O&O Y	 Y	"A"
E+U-J   4e6T  
  LK%((KKKOO #U_%;
     OO #U_%;
     OO #U_%;
     j7>qACSTTTOO)03&%     D.  
 E/   OO$+A. '%     KJqJJJOOO" '%    
 OO" '%    
 j29!<oNNN OO$+A. '%     5u7V   68X  
 OO')<     OO-uy/@Z     OO')<    
 E;'''E<(((E5!!!JLLLLsY	 Y	r%   N)rK   rt   ru   r   rR   r   r   r   r   r   r   r   r   r   r&   r%   r#   r  r    s        1 1 X12 2 2B B BH0 0 09 9 9 9D8P8T? ? ? ?D 8C7O7Sc c c c c cr%   r  )Wr   r  r6  r)  rm   typingr   r   r  r  rp   rE   rG   ImportErrorctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   r	   r
   r   r   r   r  r  r  GELUSigmoidRELUrb  r   rc  LinearSurd  r   r   rr  AWQ_GEMVr^  r    r'   r)   ABCrz   r   r4  rI  r[  rc  rk  r  r  r  r  r  r  r<  rn  r  r  r  r  r  r?  rP  rz  r  r  r  r  r  r   r
  r%  r>  rC  rZ  ry  r  r  r  rK   r'  r  r&   r%   r#   <module>r     s   



  				     				 ! ! ! ! ! ! ! !    	LLL 	 	 	D	 ) ( ( ( ( ( 6 6 6 6 6 6                  "''0&/).$/8(4"'"(#)
 
  .5

*
-.503	   $-$-  
   A A A A AI A A AHWF WF WF WF WF#' WF WF WFt @V @V @V @V @V @V @V @VF   47 47 47 47 47Z 47 47 ! 47n       :   ! B = = = = =* = =  =" !!	9 	9 	9 	9 	9J 	9 	9 "!	9 C C C C C
 C C CL #$$/G /G /G /G /G+ /G /G %$/Gd %G %G %G %G %G %G %G %GP 5G 5G 5G 5G 5G 5G 5G 5Gp !!VG VG VG VG VGK VG VG "!VGr !!LN LN LN LN LNK LN LN "!LN^ !!c  c  c  c  c J c  c  "!c L !""K@ K@ K@ K@ K@Z K@ K@ #"K@\ %&&q$ q$ q$ q$ q$ q$ q$ '&q$h y' y' y' y' y'{ y' y' y'x - - - - - - - - J J J J J+ J J  JZ 41 41 41 41 41 41 41 41n \ \ \ \ \+ \ \  \~   d d d d d; d d ! dN Q Q Q Q Q+ Q Q  Qh #$$  R R R R R; R R !  %$Rj #$$  @ @ @ @ @; @ @ !  %$@F !!D D D D DK D D "!DN I I I I I+ I I  IX S S S S S+ S S  Sl ,--0D 0D 0D 0D 0D 0D 0D .-0Df 0D 0D 0D 0D 0D 0D 0D 0Df h h h h h h h hV H H H H H{ H H HV   + + + + +8 + + ! + #$$6* 6* 6* 6* 6*{ 6* 6* %$6*r K@ K@ K@ K@ K@ K@ K@ K@\ #$$N: N: N: N: N:{ N: N: %$N:b !!N: N: N: N: N:K N: N: "!N:b "##N: N: N: N: N:k N: N: $#N:b7& 7& 7&t zDFFF	 	 	 LKKFFF	 	 	     *  ! ! !( VUU
 
 
     2     c} } B !!B B B B BK B B "!B B Bs   / 77