
    qiB                        U d Z ddlmZ ddlZddlmZmZ ddlZddlm	Z
 ddlZddlmZ ddlmc mZ ddlmc mZ ddlmc mZ dZej         G d d                      Zed         Zd	ed
efdZej         G d de
j                              Z ej         G d de
j                              Z!ej         G d de
j                              Z"ej         G d de
j                              Z#ej         G d de
j                              Z$e%ej&        ej'        df         ej&        ej'        df         f         Z(ee)d<   ej         G d de
j                              ZdddZ*d Z+d Z,dS ) zGemma adaptation for Pi, taken from big_vision.

We follow this einsum axis naming convention:
  B: batch
  T: query length
  S: k/v length
  N: num query heads
  K: num k/v heads
  G: num query heads per k/v head
  H: head dim
  D: d_model ("features")
    )SequenceN)Literal	TypeAliasi c                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<    ej        e          Zee	e
j        f         ed<   d	S )
Configwidthdepthmlp_dim	num_headsnum_kv_headshead_dim)default_factorylora_configsN)__name__
__module____qualname__int__annotations__dataclassesfielddictr   strlora
LoRAConfig     4/home/robot-lab/Pi0.5_yam/src/openpi/models/gemma.pyr   r   ,   sx         JJJJJJLLLNNNMMM/@{/@QU/V/V/VL$sDO+,VVVVVr   r   )dummy
gemma_300mgemma_300m_loragemma_2bgemma_2b_loravariantreturnc                    | dk    rt          dddddd          S | d	k    rt          d
ddddd          S | dk    rt          dddddd          S | dk    rAt          ddddddt          j        dd          t          j        dd          d          S | dk    rAt          d
dddddt          j        dd          t          j        dd          d          S t          d|            )z+Returns config for specified gemma variant.r   @                  )r   r	   r
   r   r   r   r   i      i      r!   i   i @  r"   g      0@)rankalpha)attnffn)r   r	   r
   r   r   r   r   r        g      @@zUnknown variant: )r   r   r   
ValueError)r#   s    r   
get_configr4   :   s~   '
 
 
 	
 ,
 
 
 	
 *
 
 
 	
 /!!"&/r"F"F"FtdfnrOsOsOstt
 
 
 	
 ###"&/r"F"F"FtdfnrOsOsOstt
 
 
 	
 222
3
33r   c                   .    e Zd Zej        d             ZdS )RMSNormc           	         |j         }t          j        t          j        |                    t          j                            dd          }t          j        |t          j        t          j        |dz                       z            }|]| 	                    dt          j                                        |j        d                   }|d|z   z  }|                    |          d fS  t          j        |j        d         dz  t          j        j        |          |          }t          j        |d d d d d f         dd	          \  }}}	|d|z   z  |z   }|                    |          |	fS )
NT)axiskeepdimsgư>scaler*      )kernel_initdtyper9   )r>   jnpmeansquareastypefloat32asarray
reciprocalsqrtparamnninitializers
zeros_initshapeDensezerossplit)
selfxcondr>   varnormed_inputsr;   
modulationshiftgates
             r   __call__zRMSNorm.__call__r   sM   hsz!((3;"7"788rDQQQAsxe7L7L(M(M$MNN<JJw(B(B(D(Dqwr{TTE)E	M !''..44 _RXagbkAo2?;PX]^^^_cdd
 Yz!!!T111*'=qrJJJud%U3e;##E**D00r   N)r   r   r   rI   compactrX   r   r   r   r6   r6   p   s/        Z1 1 Z1 1 1r   r6   c                   :    e Zd ZU dZeed<   eed<   d Zd Zd ZdS )EmbedderzEmbedder module.
vocab_size	embed_dimc                     |                      dt          j                                        | j        | j        f          | _        d S )Ninput_embedding)rH   rI   rJ   normalr\   r]   input_embedding_table)rP   s    r   setupzEmbedder.setup   s>    %)ZZO""$$_dn-&
 &
"""r   c                     | j         |f         }|t          j        | j                                      |j                  z  }|S N)ra   r@   rG   r]   rC   r>   rP   rQ   s     r   encodezEmbedder.encode   s<    &t,	SXdn%%,,QW555r   c                 @    t          j        || j        j                  S rd   )r@   dotra   Tre   s     r   decodezEmbedder.decode   s    wq$46777r   N)	r   r   r   __doc__r   r   rb   rf   rj   r   r   r   r[   r[      s[         OOONNN
 
 
  
8 8 8 8 8r   r[   c                   J    e Zd ZU dZee         ed<   ej        d             Z	dS )	AttentionzAttention module.configsc           
      
    t           fd j        D                       sJ t           fd j        D                       sJ t           fd j        D                       sJ t          d |D                       }g }t          t	          | j        d                    D ]\  }\  }}	||	j        |	j        k    rt          j        d|	j        |	j	        |	j
        ft          d|          t          j                            d	d
d          |	j                            d                    }
|                     |
d|                     t          j        |	j        |	j	        |	j
        ft          d|          t          j                            d	d
d          |	j                            d                    } |d|          }t          j        d|	j        |	j	        |	j
        ft          d|          t          j                            d	d
d          |	j                            d                    } |d|          \  }}|                    |||f           d t	          |ddiD             \  }}}t%          ||          }| j        d         j
        dz  z  }t%          ||          }|j        |j        cxk    r|j        cxk    r|k    sn J |5|\  }}t)          j        ||gd          }t)          j        ||gd          }t-          j        |d j        d         j                  }t)          j        d||t(          j                   }|j        |j        d         d|j        d         |j        d         fk    r't7          d!|j         d"|j         d#|j                   d$}t)          j        |d d d d d d d d d f         ||          }t:          j                            |d
                              |          }t)          j        d%||          }t-          j        |d&          }g }d}t          t	          | j        d                    D ]\  }\  }}	|||j        d         z   }t          j        |	j        |	j
        |	j	        ft          d'|          t          j                            d(d
)          |	j                            d                    }|                     |d*|d d ||f                              |}|                    d            |||ffS )+Nc              3   N   K   | ]}|j         j        d          j         k    V   dS r   N)r   rn   .0configrP   s     r   	<genexpr>z%Attention.__call__.<locals>.<genexpr>   s3      ZZ66?dl1o&>>ZZZZZZr   c              3   N   K   | ]}|j         j        d          j         k    V   dS rq   )r   rn   rr   s     r   ru   z%Attention.__call__.<locals>.<genexpr>   s4      \\V6#t|A'@@\\\\\\r   c              3   N   K   | ]}|j         j        d          j         k    V   dS rq   )r   rn   rr   s     r   ru   z%Attention.__call__.<locals>.<genexpr>   s4      bb66&$,q/*FFbbbbbbr   c              3   (   K   | ]}||j         V  d S rd   r>   )rs   rQ   s     r   ru   z%Attention.__call__.<locals>.<genexpr>   s$      ::AMQWMMMM::r   Tstrictr<   
qkv_einsumr8   )r   r*   in_axisout_axis
batch_axisr0   )rL   nameinit_fnlora_configzBSD,3KDH->3BSKHq_einsumr   zBTD,NDH->BTNH   	kv_einsumzBSD,2KDH->2BSKHc              3   B   K   | ]}t          j        |d           V  dS )r*   r?   N)r@   concatenate)rs   ys     r   ru   z%Attention.__call__.<locals>.<genexpr>   s1      OO!3?11---OOOOOOr   r{   )	positionsr   g      r*   r?   zB T (K G) H -> B T K G H)KzBTKGH,BSKH->BKGTS)preferred_element_typezAttention mask with shape z but shapes for q and k are: z and g<ffzBKGTS,BSKH->BTKGHzB T K G H -> B T (K G) Hattn_vec_einsum)r}   r   r   zBTNH,NHD->BTD) allrn   next	enumeratezipr   r   r   Einsumr   r   _namerI   rJ   lecun_normalr   getappend_apply_roper>   r@   r   einops	rearrangeeinsumrD   rL   r3   wherejaxsoftmaxrC   )rP   xsr   	attn_maskkv_cacher>   qkvsirQ   rt   r|   r   qr   kvcache_kcache_vlogitsbig_negmasked_logitsprobsencodedoutstartend
out_einsums   `                          r   rX   zAttention.__call__   s    ZZZZT\ZZZZZZZZ\\\\t|\\\\\\\\bbbbUYUabbbbbbbb::b:::::'BT(J(J(JKK 	' 	'NA{6y"f&666![f.foN|Q//O88b]c8dd & 3 7 7 ? ?	  
 JJ'8!<<====;!+V\6?Kz1--O88b]a8bb & 3 7 7 ? ?	   H_a00 Kf16<Q{A..O88b]c8dd & 3 7 7 ? ?	  	 !y!2A661Q1I&&&&OOsD7N7N7NOOO1aY///	T\!_%--Y/// w!'5555QW5555555555'GW!1555A!1555AQ :dl1o>Z[[[/AckZZZ?qwqz1agaj!'!*EEErY_rr[\[brrijiprr  
  	)AAAqqq$111,<"=vwOO}266==eDD*0%;;"7,FGG'BT(J(J(JKK 	! 	!NA{6}agaj(![!+V_flK0!44O88TV8WW & 3 7 7 ? ?	  
 

::owqqq%)|7LMMNNN

4    QF{r   N)
r   r   r   rk   r   r   r   rI   rY   rX   r   r   r   rm   rm      sM         fZU U ZU U Ur   rm   c                   H    e Zd ZU dZeed<   eed<   ej        d             ZdS )FeedForwardzFeed forward module.features
hidden_dimc                 N   |j         }|                     dt          j                            ddd          d| j        | j        f                              |          }t          j	        ||d                   }t          j
        |          }t          j	        ||d                   }||z  }|                     d	t          j                            dd
          | j        | j        f                              |          }t          j	        ||          }	|	j         |k    sJ |	S )Ngating_einsumr}   r8   r   r~   r   r   r*   linearr   )r>   rH   rI   rJ   r   r   r   rC   r@   rh   gelu)
rP   rQ   r>   w_gatingff_gate
gate_valueff1activationsw_linearoutputss
             r   rX   zFeedForward.__call__  s   ::O((bT(RRt/
 
 &--	 	
 '!Xa[))WW%%
ga!%% 3&::O((b(AA_dm,
 
 &--	 	
 '+x00}%%%%r   N)	r   r   r   rk   r   r   rI   rY   rX   r   r   r   r   r      sI         MMMOOOZ  Z  r   r   c                   |    e Zd ZU dZeedf         ed<   dZeed<   dZ	ee
df         ed<   ej        dd	            Zd
S )BlockzTransformer block..rn           dropoutr   dropout_bdimsTc           
      \   t          j        |          }| j        rt          j        | j        | j                  nd t          | j        d          }g }g }	t          |          D ]f\  }
}|1 t          t          d|
                    |||
                   \  }}|                    |           |	                    ||nd            gt          j        |          } |||||          \  }}t          j                            fd|          }t          j        |          }d t          |||	d	          D             }t          j        |          }g }g }	t          t          || j        d	                    D ]\  }
\  }}| t          t          d
|
                    |||
                   \  }} t!          j        |j        |j        t          d|
          |j                            d                    |          }|                    |           |	                    ||nd            t          j        |          }t          j                            fd|          }d t          |||	d	          D             }t          j        |          }||fS )Nc                     | S rd   r   )rQ   _s     r   <lambda>z Block.__call__.<locals>.<lambda>'  s    ^_ r   r0   )rn   r   pre_attention_normr   c                      |           S rd   r   rQ   deterministicdrops    r   r   z Block.__call__.<locals>.<lambda>5  s    44=+A+A r   c                 8    g | ]\  }}}t          |||          S r   _gated_residualrs   rQ   r   rW   s       r   
<listcomp>z"Block.__call__.<locals>.<listcomp>7  s*    cccjaDoaD))cccr   Trz   pre_ffw_normmlpr1   )r   r   r   r   c                      |           S rd   r   r   s    r   r   z Block.__call__.<locals>.<lambda>I  s    TT!]%;%; r   c                 8    g | ]\  }}}t          |||          S r   r   r   s       r   r   z"Block.__call__.<locals>.<listcomp>J  s*    ]]]jaDoaD))]]]r   )shardingactivation_sharding_constraintr   rI   Dropoutr   rm   rn   r   r6   r   r   r   treemapr   r   r   r   r
   r   r   )rP   r   r   r   r   adarms_condr   r0   pre_attngatesr   rQ   rW   	post_attnr   rt   r   s         `         @r   rX   zBlock.__call__$  s   4R88?C|_rz$,(:;;;Q_Q_F;;;bMM 	: 	:DAq}F'u-A1'E'EFFFq+VW.YY4OOALLD9999:8DD"d8Y	8LL	8HLL!A!A!A!A!A9MM	;IFF	ccCIu]a<b<b<bccc4R88'BT(J(J(JKK 
	: 
	:NA{6}@'u^Q'?'?@@@KPQNSS4D$#\%~ua & 3 7 7 > >	  
   JJqMMMLLD99995c::hll;;;;;SAA]]CCW[<\<\<\]]]4R888|r   N)T)r   r   r   rk   tupler   r   r   floatr   r   rI   rY   rX   r   r   r   r   r     sy         63;GU%'M5c?'''Z( ( ( Z( ( (r   r   zl b _t _k _hzl b _t _v _hKVCachec                   N   e Zd ZU dZee         ed<   eed<   dZe	ed<   dZ
eedf         ed<   d	Zeed
<   d Zej        dej        ej        df         dej        ej        df         fd            Zej        	 d ddddeej        ej        df         dz           dej        ej        df         dej        ej        df         deej        ej        df         dz           dz  dedz  dedeeej        ej        df         dz           ef         fd            Zdee         fdZdS )!ModulezRTransformer model, supporting a mixture of different weights for different tokens.rn   embed_dtyper   r   r   .r   Fadarmsc                 R    t           fd j        D                       sJ t          t           j        d         j        d           _        t          j        t          ddt          j
        j                  } t          j        |ddid	d	d
dt          j        t          j        t          j        t          j        f j        d         j                   j         j         j                   _        d t%          t'           j                            D              _        d S )Nc              3   N   K   | ]}|j         j        d          j         k    V   dS rq   )r	   rn   rr   s     r   ru   zModule.setup.<locals>.<genexpr>`  s3      TTV6<4<?#88TTTTTTr   r   embedder)r\   r]   r   F)   )prevent_csestatic_argnumspolicyparamsT)r   r   )variable_axes
split_rngsin_axeslength)rn   r   r   c                 J    g | ] }t          t          d |                    !S )
final_normr   )r6   r   )rs   r   s     r   r   z Module.setup.<locals>.<listcomp>~  s-    cccQG|Q)?)?@@@cccr   )r   rn   r[   PALIGEMMA_VOCAB_SIZEr   r   rI   rematr   r   checkpoint_policiesnothing_saveablescan	broadcastr	   r   r   layersrangelenfinal_norms)rP   	block_clss   ` r   rb   zModule.setup^  s0   TTTTt|TTTTTTTT +l1o+
 
 

 H*;	
 
 
	
bg#Q-"&488 <?(
 
 
 LL,
 
 
" dc%PSTXT`PaPaJbJbcccr   tokenszb tr$   zb t dc                 f    | j                             |                              | j                  S rd   )r   rf   rC   r   )rP   r  s     r   embedzModule.embed  s)    }##F++2243CDDDr   NT)r   r   embeddedzb _t _dr   maskzb t sr   zb _dr   r   c                    t           j                             fd|          }t          j        |          d d d d d d d f         }|d gt           j                  z  }                     ||||||          \  }}t           fd|D                       sJ d t           j
        ||d          D             |fS )Nc                 8    |                      j                  S rd   )rC   r   )erP   s    r   r   z!Module.__call__.<locals>.<lambda>  s    !((43C*D*D r   c              3   `   K   | ](}||j         t          j         j                  k    V  )d S rd   )r>   r@   r   )rs   r  rP   s     r   ru   z"Module.__call__.<locals>.<genexpr>  s7      ]]aq}17ci(8999}}}}]]r   c                 B    g | ]\  }}}| |||          d         n|S )Nr   r   )rs   fr  as       r   r   z#Module.__call__.<locals>.<listcomp>  sA     
 
 
3:1a!-AAaGGAJJQ
 
 
r   Trz   )r   r   r   r@   rE   r  rn   r   r   r   r  )rP   r  r   r	  r   r   r   s   `      r   rX   zModule.__call__  s     8<< D D D DhOO{4  D!!!QQQ/&3t|#4#44K![[8Yk[hii(]]]]8]]]]]]]]
 
>A$BRT\^irv>w>w>w
 
 
 	r   
use_adarmsc                    |                      t          j        dt          j                              | d | j        D             t          j        dt          | j                  ft          j                  t          j        dt          | j                  t          | j                  ft                    d t          || j        d          D                        d	S )
zYConvenience method for initializing all parameters, necessary due to the quirks of linen.)r*   r*   ry   c                 F    g | ]}t          j        d d |j        f          S )r*   r@   rN   r   )rs   cs     r   r   zModule.init.<locals>.<listcomp>  s*    >>>ASY1ag''>>>r   r*   c                 R    g | ]$\  }}|rt          j        d |j        f          nd%S )r*   Nr  )rs   ur  s      r   r   zModule.init.<locals>.<listcomp>  s6    wwwDAqA?Aqw<0004wwwr   Trz   )r   N)r  r@   rN   int32rn   r  boolr   )rP   r  s     r   initzModule.init  s    

39V39555666>>>>>Iq#dl++,CI>>>Iq#dl++S->->?tLLLwwCPZ\`\hquLvLvLvwww		
 	
 	
 	
 	
 	
r   rd   )r   r   r   rk   r   r   r   r   r   r   r   r   r   r   r  rb   at	typecheckIntArrayFloatr  Boolr   rX   r  r   r   r   r   r   S  s        \\fGU%'M5c?'''FD d  d  dD \EBF28U?3 E7AR8S E E E \E \ KO $("   28BHi$784?@ 6"(E/*	
 gbh'( bhrx'784?@4G D.  
x9!45<=wF	G   \0
x~ 
 
 
 
 
 
r   r   i'  )max_wavelengthc                8   d| j         d         z  t          j        | j         d         dz  t          j                  z  }||z  }|d         |ddddf         z  }|ddddf         }|j        t          j        k    sJ t          j        |          t          j        |          }}t          j        | dd          \  }}	t          j        ||z  |	|z  z
  |	|z  ||z  z   gd          }
|
j        t          j        k    sJ |
	                    | j                  S )	z0Applies RoPE positions [B, L] to x [B, L, H, D].g       @r8   r   ry   ).NN.r?   )
rL   r@   arangerD   r>   sincosrO   r   rC   )rQ   r   r!  freq_exponents	timescaleradiansr$  r%  x1x2ress              r   r   r     s   AGBK'3:agbkQ6Fck+Z+Z+ZZN.I	"YtT111}%==Gc4l#G=CK''''ww!1!1CYq!"%%%FB
/28b3h.S280CD2
N
N
NC9####
 ::agr   c                      |dk    r| S |  d| S )Nr   r   r   )r   r   s     r   r   r     s"    
 	Avv==Q==r   c                 @    | d u |d u k    sJ | d S || |z   S | ||z  z   S rd   r   )rQ   r   rW   s      r   r   r     sA    I19%%%%yt|1uq4x<r   )-rk   collections.abcr   r   typingr   r   r   
flax.linenlinenrI   r   	jax.numpynumpyr@   openpi.models.loramodelsr   openpi.shared.array_typingsharedarray_typingr  openpi.training.shardingtrainingr   r   	dataclassr   Variantr4   r  r   r6   r[   rm   r   r   r   r  r  r   r   r   r   r   r   r   r   <module>r=     s7     % $ $ $ $ $     % % % % % % % %        



       ! ! ! ! ! ! ! ! ! ' ' ' ' ' ' ' ' ' + + + + + + + + +  W W W W W W W W W
X34 34F 34 34 34 34l 1 1 1 1 1bi 1 1 1* 8 8 8 8 8ry 8 8 8, [ [ [ [ [	 [ [ [|     ")   < 1 1 1 1 1BI 1 1 1h 28BHn$<=rxR`H`?aab b b b Q
 Q
 Q
 Q
 Q
RY Q
 Q
 Q
h 17     &      r   