
    qi=                        U d Z ddlZddlmZmZ ddlZddlmZ ddl	Z	ddl
mZ ddlZddlmc mZ ddlmc mZ ed         Zd Zej         G d dej                              Zej         G d d	ej                              Zej         G d
 dej                              Zej         G d dej                              Zej         G d dej                              Zeej        ej         df         ej!        ej         df         ej!        ej         df         f         Z"ee#d<   ej         G d dej                              ZdddZ$dS )z
Gemma model implementation from big_vision/models/ppp/gemma.py (with small modifications for NNX compatibility)
Used for FAST autoregressive policies.
    N)Literal	TypeAlias)gemma_2bgemma_2b_lorac                    | dk    r t          j        | dddddddd	d
dd          S | dk    rLt          j        | dddddddd	d
dt          j        dd          t          j        dd          dd          S t	          d|            )z+Returns config for specified gemma variant.r   i      i @           ư>i Tnothing_saveable)variantwidthdepthmlp_dim	num_headsnum_kv_headshead_dimnorm_eps
vocab_sizescanremat_policyr      g      0@)rankalpha)attnffn)r   r   r   r   r   r   r   r   r   r   r   lora_configszUnknown variant: )ml_collections
ConfigDictlora
LoRAConfig
ValueError)r   s    9/home/robot-lab/Pi0.5_yam/src/openpi/models/gemma_fast.py
get_configr%   #   s    *("! ! % 2 
 
 	
 /!!("! ! % 2 O4@@@?$???! ! 
 
 	
& 222
3
33    c                   J    e Zd ZU eedf         ed<   ej        d             ZdS )Einsum.shapec                     |j         }|                     dt          j                                        | j                                      |          }t          j        |||          S )Nw)	dtypeparamnninitializers
zeros_initr)   astypejnpeinsum)selfeqnxr,   r+   s        r$   __call__zEinsum.__call__P   sP    JJsBO6688$*EELLUSSz#q!$$$r&   N)	__name__
__module____qualname__tupleint__annotations__r.   compactr7    r&   r$   r(   r(   L   sE         c?Z% % Z% % %r&   r(   c                   .    e Zd Zej        d             ZdS )RMSNormc           	         |j         }|                     dt          j                                        |j        d                   }t          j        t          j        |	                    t          j
                            dd          }t          j        |t          j        t          j        |dz                       z            }|d|z   z  }|	                    |          S )NscaleT)axiskeepdimsr   r
   )r,   r-   r.   r/   r0   r)   r2   meansquarer1   float32asarray
reciprocalsqrt)r4   r6   r,   rC   varnormed_inputss         r$   r7   zRMSNorm.__call__Y   s    

7BO$>$>$@$@172;PPhsz!((3;"7"788rDQQQAsxe7L7L(M(M$MNN%I
 ##E***r&   N)r8   r9   r:   r.   r>   r7   r?   r&   r$   rA   rA   W   s/        Z+ + Z+ + +r&   rA   c                   :    e Zd ZU dZeed<   eed<   d Zd Zd ZdS )EmbedderzEmbedder module.r   	embed_dimc                     |                      dt          j                                        | j        | j        f          | _        d S )Ninput_embedding)r-   r.   r/   r0   r   rQ   input_embedding_tabler4   s    r$   setupzEmbedder.setupl   s>    %)ZZO&&((_dn-&
 &
"""r&   c                     | j         |f         }|t          j        | j                                      |j                  z  }|S N)rT   r2   rL   rQ   r1   r,   r4   r6   s     r$   encodezEmbedder.encodes   s<    &t,	SXdn%%,,QW555r&   c                 @    t          j        || j        j                  S rX   )r2   dotrT   TrY   s     r$   decodezEmbedder.decodex   s    wq$46777r&   N)	r8   r9   r:   __doc__r<   r=   rV   rZ   r^   r?   r&   r$   rP   rP   e   s[         OOONNN
 
 
  
8 8 8 8 8r&   rP   c                       e Zd ZU dZeed<   eed<   eed<   eed<   dZedz  ed<   dZe	j
        dz  ed<   d	 Zd
 Zd Zej        dd            ZdS )	AttentionzAttention module.r   r   featuresr   Ncache_dtypelora_configc           	         | j         | j        k    rVt          j        d| j        | j        | j        fdt          j                            ddd          | j	                  | _
        nt          j        | j        | j        | j        fdt          j                            ddd	          | j	                  | _        t          j        d
| j         | j        | j        fdt          j                            ddd          | j	                  | _        t          j        | j        | j        | j        fdt          j                            ddd	          | j	                  | _        d S )N   
qkv_einsumrD   )r   r
   )in_axisout_axis
batch_axis)r)   nameinit_fnrd   q_einsum)r      	kv_einsumattn_vec_einsum)r   r   r!   r(   rb   r   r.   r/   lecun_normalrd   rg   rn   rp   rq   rU   s    r$   rV   zAttention.setup   sV   .."k$.$-G!44R"Y_4`` ,	  DOO !K~t}dmD44R"Y]4^^ ,	  DM "[$+T]DMJ 44R"Y_4`` ,	  DN  ${>4=$-@"O00bUY0ZZ(	 
  
  
r&   c                 V   |j         d         }dd||z
  fddf}| j        p|j        }t          j        |                    |          |          }t          j        |                    |          |          }t          j        |j         d         ft          j                  |z   }	|	||fS )zInitialize KV cacher
   )r   r   r   r,   )r)   rc   r,   r2   padr1   zerosint32)
r4   kv
cache_sizeprefill_len	pad_widthrc   k_cachev_cacheidxs
             r$   _init_cachezAttention._init_cache   s    gajak!9:FFK	&1!''!((;//;;'!((;//;;iSY777+EGW$$r&   c                 P   |j         d         dk    s
J d            d|d         ddf}| j        p|j        }t          j                            ||                    |          |          }t          j                            ||                    |          |          }	|dz   }
|
||	fS )zUpdate KV cache with new valuesr
   z)Only support kv-cache updates of length 1r   )r)   rc   r,   jaxlaxdynamic_update_slicer1   )r4   rx   ry   r   r}   r~   indicesrc   k_newv_newidx_news              r$   _update_cachezAttention._update_cache   s    wqzQ Kc!fa#&1!',,Wahh{6K6KWUU,,Wahh{6K6KWUU'u$$r&   Tc           	      :   |j         }| j        | j        k    r|                     d|          \  }}	}
n/|                     d|          }|                     d|          \  }	}
t          ||          }|| j        dz  z  }t          |	|          }	|'|                     |	|
|j	        d                   \  }}}n#|\  }}}| 
                    |	|
|||          \  }}}||}
}	|||f}t          j        |d| j                  }t          j        d	||	t          j        
          }|j	        |j	        d         d|j	        d         |	j	        d         fk    r't!          d|j	         d|j	         d|	j	                   d}t          j        |d d d d d d d d d f         ||          }t$          j                            |d                              |          }t          j        d||
          }t          j        |d          }|                     d|          |fS )NzBSD,3KDH->3BSKHzBTD,NDH->BTNHzBSD,2KDH->2BSKH)	positionsg      rD   zB T (K G) H -> B T K G H)KzBTKGH,BSKH->BKGTS)preferred_element_typer   r
   zAttention mask with shape z but shapes for q and k are: z and g<ffǩrE   zBKGTS,BSKH->BTKGHzB T K G H -> B T (K G) HzBTNH,NHD->BTD)r,   r   r   rg   rn   rp   _apply_roper   r   r)   r   einops	rearranger2   r3   rI   r#   wherer   r.   softmaxr1   rq   )r4   r6   r   	attn_maskkv_cacher^   deterministicr,   qrx   ry   r   r}   r~   logitsbig_negmasked_logitsprobsencodeds                      r$   r7   zAttention.__call__   s;   ..oo&7;;GAq!!oq11A>>"3Q77DAqY///	T]D  Y///$($4$4Q9?2;N$O$O!C''$,!C'$($6$6q!S'7$S$S!C'1'*Q :d>OPPP/AckZZZ?qwqz1agaj!'!*EEErY_rr[\[brrijiprr  
  	)AAAqqq$111,<"=vwOO}266==eDD*0%;;"7,FGG##OW==xGGr&   T)r8   r9   r:   r_   r<   r=   rc   strrd   r!   r"   rV   r   r   r.   r>   r7   r?   r&   r$   ra   ra   |   s         NNNMMMMMM"Kt"""*.K4'...
 
 
8% % %% % % Z&H &H &H Z&H &H &Hr&   ra   c                       e Zd ZU dZeed<   eed<   eed<   eed<   eed<   dZeed<   d	Ze	ed
f         ed<   dZ
edz  ed<    ej        ej                  Zej        ed<   d ZddZdS )BlockzTransformer block.r   r   rQ   r   
hidden_dim        dropoutr?   .dropout_bdimsNrc   default_factoryr   c           
         t                      | _        t          | j        | j        | j        | j        | j        | j        	                    d                    | _
        t                      | _        t          j        | j        | j        d| j        	                    d                    | _        | j        r&t#          j        | j        | j                  | _        d S d | _        d S )Nr   )r   r   rb   r   rc   rd   mlpr   )rb   r   rl   rd   c                     | S rX   r?   )r6   _s     r$   <lambda>zBlock.setup.<locals>.<lambda>  s    Q r&   )rA   pre_attention_normra   r   r   rQ   r   rc   r   getr   pre_ffw_normr!   FeedForwardr   r   r   r.   Dropoutr   droprU   s    r$   rV   zBlock.setup   s    ")))n*^]()--f55
 
 
	 $II#^eY]YjYnYnotYuYu
 
 
 < 	'
4<1CDDDIII&DIIIr&   Tc                 \   t          j        |d          }|                     |          }|                     ||||||          \  }}|                     ||          }||z  }|}	|                     |          }|                     |          }
|                     |
|          }
|	|
z   }
|
|fS )N)	act_batchact_lenact_emb)r.   with_logical_constraintr   r   r   r   r   )r4   r6   r   r   r   r^   r   inputs_normalizedattn_outputresidualoutputss              r$   r7   zBlock.__call__  s    &q*MNN 33A66 $		*;Y	S[]cer s sXii];;q''44((;''))G]33W$  r&   r   )r8   r9   r:   r_   r<   r=   r   floatr   r;   rc   r   dataclassesfieldr   r    r   rV   r7   r?   r&   r$   r   r      s         NNNNNNMMMOOOGU%'M5c?'''"Kt""".?k.?P^Pi.j.j.jL.+jjj' ' '&! ! ! ! ! !r&   r   z bz
b _t _k _hz
b _t _v _hKVCachec                   P   e Zd ZU dZeed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   dZeed<   dZ	e
edf         ed<   dZedz  ed<   dZeed<   dZeed<    ej        ej                  Zej        ed<   ej        	 	 	 	 	 	 	 	 	 	 dd            Zd ZdS )Modulezgemma model.r   r   r   r   r   r   r   r   r   embed_dtyper   r   r?   .r   Nrc   Fr   noner   r   r   Tc                    i }t          | j        | j        d          }|%|x}|d<   |                    |          x}|d<   ||fS g }||                    |           |(|                    |                    |                     t          j        |d          }|                    | j	                  }|j
        \  }}}|r|S |r||
J d            |;t          j        |                              t          j                  dddf         }|j
        d	         |j
        d	         k    sJ |j
        |j
        f            |3t          j                            t          j        ||g                    }|j        d
k    r|dddddddf         }t%          ||j
        d                   }|j
        |d	||fk    sJ |j
                    | j        dk    rt(          }n@t          j        t(          | j         dt/          t0          j        | j                            }| j        | j        | j        || j        | j        | j        | j         | j!        d	}| j"        #                    d          } t          j        |ddiddddt          j$        t          j$        t          j$        t          j$        f| j%                  dd|i|g}|D ]} |||||||	          \  }}|j&        t          j&        | j	                  k    sJ ||d<    tO          d          |          }||d<   |
r|||fS |                    |          }||d<   |||fS )a  Embed only, or complete forward pass.

        Args:
          tokens: Embedded, then and appended to `embedded_prefix`. Can be None.
          embedded_prefix: Optional prefix that is already embedded.
          embed_only: Whether to compute embeddings only.
          pre_logits: If present computes logits from pre_logits and returns.
          positions: Optional `[B, T]` allows to specify the absolute position of
            the tokens.
          mask: Optional attention mask `[B, T, S]`.
          decode: Whether to use kv-cache. Caller must pass masks and positions.
          deterministic: Forwarded to all dropout layers.
          return_prelogits: Whether to return the pre-logits.

        Returns:
          If `embed_only=False`, then `(logits, out)` will be returned.
          If `embed_only=True`, then the embeddings will be returned.
          If `return_prelogits=True`, then the pre-logits will be returned.
        embedder)r   rQ   rl   N
pre_logitsr   rh   r   z5Must explicitly pass positions and mask for decoding.r
   rf   rD   r   )      )prevent_csestatic_argnumspolicy)	r   r   r   rQ   r   r   r   rc   r   layersparamsr   T)r   r   )variable_axes
split_rngsin_axeslengthparentr   
final_norm)rl   r?   )(rP   r   r   r^   appendrZ   r2   concatenater1   r   r)   arangerw   r.   	attentionmake_causal_maskonesndimmaxr   r   rematr   getattrr   checkpoint_policiesr   r   r   r   r   r   rc   r   scopepush	broadcastr   r,   rA   )r4   tokensembedded_prefix
embed_onlyr   r   maskr^   r   r   return_prelogitsoutr   r6   r   
batch_sizeseq_lenr   rz   	block_clsblock_kwr   blocksblocks                           r$   r7   zModule.__call__.  s   B t$*S]^^^!$..AL!%-__Q%7%77FS]3;&HH_%%%HHX__V,,---OAB'''HHT%&&%&W"
GU 	H 	(T-=-=G .>-== 
7++2239==dAAAgFIq!QWQZ///)/171K///<<00:w:O1P1PQQD9>>4AAA&D$*R.11
zj!WjAAAA4:AAA&&II $	M%s68IJJ	  I  -,|!/+ -

 

 **BG'm&*t<<BL",blSz   ) ) )  () )
  	U 	UE%8YfmTTKAxxw#)D$4555555I&G&&&q))L 	$h##OOAH(Cr&   c                 X     | t          j        dt           j                             dS )zYConvenience method for initializing all parameters, necessary due to the quirks of linen.)r
   r
   rt   N)r2   rv   rw   rU   s    r$   initzModule.init  s)    SYvSY///00000r&   )
NNFNNNFNTF)r8   r9   r:   r_   r   r=   r<   r   r   r   r;   rc   r   boolr   r   r   r   r    r   r.   r>   r7   r   r?   r&   r$   r   r     sS        LLLJJJJJJLLLNNNMMMOOOOOOGU%'M5c?'''"Kt"""D$L#.?k.?P^Pi.j.j.jL.+jjjZ s  s  s  Zs j1 1 1 1 1r&   r   i'  )max_wavelengthc                   d| j         d         z  t          j        | j         d         dz  t          j                  z  }||z  }|d         |ddddf         z  }|ddddf         }|j        t          j        k    sJ t          j        |          t          j        |          }}t          j        | dd          \  }}	t          j        ||z  |	|z  z
  |	|z  ||z  z   gd          }
|
j        t          j        k    sJ |
S )	z0Applies RoPE positions [B, L] to x [B, L, H, D].g       @rD   ro   rt   ).NN.r   )	r)   r2   r   rI   r,   sincossplitr   )r6   r   r   freq_exponents	timescaleradiansr   r   x1x2ress              r$   r   r     s   AGBK'3:agbkQ6Fck+Z+Z+ZZN.I	"YtT111}%==Gc4l#G=CK''''ww!1!1CYq!"%%%FB
/28b3h.S280CD2
N
N
NC9####Jr&   )%r_   r   typingr   r   r   
flax.linenlinenr.   r   	jax.numpynumpyr2   r   openpi.models.loramodelsr!   openpi.shared.array_typingsharedarray_typingatVariantr%   	typecheckr   r(   rA   rP   ra   r   r;   IntArrayFloatr   r=   r   r?   r&   r$   <module>r     s    
     % % % % % % % %        



           ! ! ! ! ! ! ! ! ! ' ' ' ' ' ' ' ' '
-
.&4 &4 &4R % % % % %RY % % % 
+ 
+ 
+ 
+ 
+bi 
+ 
+ 
+ 8 8 8 8 8ry 8 8 8, cH cH cH cH cH	 cH cH cHL ,! ,! ,! ,! ,!BI ,! ,! ,!^ 26"(D.128BHl<R3SUWU]^`^fht^tUuuv v v v O1 O1 O1 O1 O1RY O1 O1 O1d 17       r&   