
    qi3                        d dl Z d dlZd dlmZ d dlZd dlmZ d dlmc mZ	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmc mZ d dlmc mZ d dlmZ d dlmc mZ  ej         d          Z!dZ"d Z#e
j$        d	             Z%d
 Z& e j'        d           G d dej(                              Z) G d dej*                  Z+dS )    N)Any)override)model)array_typingopenpi   c                 
   t          j        || j                  }t          j        |d          }|dddddf         |dddddf         k    }| dddddf         | dddddf         z  }t          j        ||          S )a  Adapted from big_vision.

    Tokens can attend to valid inputs tokens which have a cumulative mask_ar
    smaller or equal to theirs. This way `mask_ar` bool[?B, N] can be used to
    setup several types of attention, for example:

      [[1 1 1 1 1 1]]: pure causal attention.

      [[0 0 0 1 1 1]]: prefix-lm attention. The first 3 tokens can attend between
          themselves and the last 3 tokens have a causal attention. The first
          entry could also be a 1 without changing behaviour.

      [[1 0 1 0 1 0 0 1 0 0]]: causal attention between 4 blocks. Tokens of a
          block can attend all previous blocks and all tokens on the same block.

    Args:
      input_mask: bool[B, N] true if its part of the input, false if padding.
      mask_ar: bool[?B, N] mask that's true where previous tokens cannot depend on
        it and false where it shares the same attention mask as the previous token.
    r   axisN)jnpbroadcast_toshapecumsumlogical_and)
input_maskmask_arr   	attn_mask
valid_masks        7/home/robot-lab/Pi0.5_yam/src/openpi/models/pi0_fast.pymake_attn_maskr      s    * w
(899GZa(((Fqqq$z"fQQQ4Z&88IAAAtQQQJ'*QQQ4Z*@@J?9j111    c                    | j         dk    sJ |j         dk    sJ |j         dk    sJ | j        d         |j        d         k    sJ |j        d         |j        d         k    sJ |j                    t          j        |t          j        |j        d                   z            dz   }t          j        | | d          } t          j        || d          }t          j        || d          }| ||fS )z0Converts input from left-align to right-aligned.   r   r   r
   )r   r   )ndimr   r   maxarangeroll)xr   r   seqlens       r   left_to_right_alignr    3   s    6Q;;;;?a>Q71:)!,,,,,?1!3333Y_333WZ#*Z-=a-@"A"AABBQFFVG!$$$A*vgA666JVG&999Ij)##r   c                    | j         |j         cxk    r|j         k    sn J | j         |j         |j         f            t          j                            || j        d         |j                  }t          j        dt          j        |j        t          j	                  |          }t          j        d||          }t          j
        |||           S )z>Like np.put_along_axis(..., axis=-1), since jax is missing it.)dtypez...i,...in->...n)r   jaxnnone_hotr   r#   r   einsumonesint32where)arrindicesvaluesonehotput_mask
put_valuess         r   put_along_last_axisr1   C   s    8w|2222v{22222SXw|V[4Y222V^^GSYr]&,^GGFz,chv|SY.O.OQWXXH.??J9Xz3///r   T)frozenc                   b   e Zd ZU dZeed<   dZej        ed<   dZ	e
ed<   dZe
ed<   dZe
ed	<   d
Zed
z  ed<   d
Zeeef         d
z  ed<   eedej        fd                        Zedej        ddfd            Zeddde
deej        ej        f         fd            Zdej        j        fdZ d
S )Pi0FASTConfigbfloat16r#   gemma_2bpaligemma_variant    
action_dimaction_horizon   max_token_lenNfast_model_tokenizerfast_model_tokenizer_kwargsreturnc                 $    t           j        j        S N)_model	ModelTypePI0_FASTselfs    r   
model_typezPi0FASTConfig.model_type[   s     ((r   rngPi0FASTc                 H    t          | t          j        |                    S )N)rngs)rI   nnxRngs)rF   rH   s     r   createzPi0FASTConfig.create`   s    t#(3--0000r   r   )
batch_sizerO   c                   t          j        |gt          j        dt          j                  }t          j        |gt          j                  }t          j                    5  t          j	        |||d|||dt          j        || j
        gt          j                  t          j        || j        gt          j                  t          j        || j        gt                    t          j        || j        gt          j                  t          j        || j        gt          j                            }d d d            n# 1 swxY w Y   t          j        || j        | j
        gt          j                  }||fS )N   )
base_0_rgb
base_1_rgbwrist_0_rgb)imagesimage_masksstatetokenized_prompttokenized_prompt_masktoken_ar_masktoken_loss_mask)r$   ShapeDtypeStructrB   IMAGE_RESOLUTIONr   float32bool_atdisable_typecheckingObservationr9   r<   r)   boolr:   )rF   rO   
image_specimage_mask_specobservation_specaction_specs         r   inputs_speczPi0FASTConfig.inputs_specd   s   ):*S8O*SQR*SUXU`aa
.
|SYGG$&& 	 	%1",",#-  #2"1#2 
 *J+H#+VV!$!5z4CU6VX[Xa!b!b&)&:JHZ;[]a&b&b!2J@R3SUXU^__ # 4j$BT5UWZW` a a     	 	 	 	 	 	 	 	 	 	 	 	 	 	 	$ *J8KT_+]_b_jkk,,s   "CEEEc                     d| j         v rKt          j        t          j        d          t          j        t          j        d                              S t          j        S )z4Returns the freeze filter based on the model config.loraz.*llm.*z.*lora.*)r7   rL   All	nnx_utils	PathRegexNotNothingrE   s    r   get_freeze_filterzPi0FASTConfig.get_freeze_filter   sK    T+++79.y99379CVWaCbCb;c;cddd{r   )!__name__
__module____qualname__r#   str__annotations__r7   _gemmaVariantr9   intr:   r<   r=   r   r>   dictpropertyr   rB   rC   rG   r`   KeyArrayLikerN   tuplerb   Actionsrh   rL   	filterlibFilterrp    r   r   r4   r4   L   sl        E3(2v~222 JNCM3 (,#*+++9=c3h$!6===)F, ) ) ) X X) 1"/ 1i 1 1 1 X1 /0 - - - -U6;Mv~;]5^ - - - X-43=#7      r   r4   c                       e Zd Zdedej        f fdZej        de	j
        deej        ej        df         ej        ej        df         ej        ej        df         f         fd            Zed	d
dej        de	j
        de	j        dedej        ej        df         f
d            Zeddddej        de	j
        deej        ej        df         z  dede	j        f
d            Z xZS )rI   configrK   c           	         t                                          |j        |j        |j                   t          j        |j                  }t          j	        t          j
        di ||j        |j        d          }|                    |d           t          j	        t          j
        |j        ddd|j                            }|                    t          t!          |                                j                                                            d|	           t)          j        ||
          | _        d S )N)embed_dtypecache_dtypeinit)rK   methodz	So400m/14noneT)num_classesvariant	pool_typescandtype_mmF)trainrK   )llmimgr   )super__init__r9   r:   r<   rv   
get_configr7   
nnx_bridgeToNNXModuler#   	lazy_init_siglipwidthnextiterfake_obsrU   r-   rL   Dict	PaliGemma)rF   r   rK   paligemma_configr   r   	__class__s         r   r   zPi0FAST.__init__   s3   *F,A6CWXXX!,V-EFFM  ""L"L   
 
 	4///N,2#   
 
 	d4 1 1 8 ? ? A ABBCC5W[\\\cs333r   obsr?   zb s embzb sc           	         g }g }g }|j         D ]}| j                            |j         |         d          \  }}|                    |           |                    t	          j        |j        |         d|j        d                              |                    d|d         z             |j        
J d            |j	        
J d	            |j
        
J d
            | j                            |j        d          }|                    |           |                    |j	                   |                    |j
                   t          j        |d          t          j        |d          t          j        |d          fS )NFr   zb -> b sr   )sr   r"   zTokenized prompt is requiredz!Tokenized prompt mask is requiredz&Token auto-regressive mask is requiredT
embed_onlyr
   )rU   r   r   appendeinopsrepeatrV   r   rX   rY   rZ   r   r   concatenate)	rF   r   r   ar_masktoken_embeddingsnameimage_token_embeddings_tokenized_inputs_embeddingss	            r   embed_inputszPi0FAST.embed_inputs   s    
J 	/ 	/D(,(:(:3:d;KSX(:(Y(Y%"A##$:;;;OD),215     NN1z"~-.... #//1O///(446Y444 ,,.V,,,&*n&8&89MZ^&8&_&_# ;<<<#3444s())) O,1555OJQ///OG!,,,
 	
r   Fr   rH   observationactionsr   z*b ahc          	      |   t          j        |||t          |j                                                            }|                     |          \  }}}t          ||          }t          j        	                    |j
        d d dd f         | j        j        j        j                  }	| j                            |d d d df         |d d d dd df         d          \  }
}}| j                            |
d d |	j        d          d f                   \  }}t          j                            |d          }|j        
J d            |j        d d dd f         }t%          j        |	|z  d          }t%          j        ||z  d           t%          j        t%          j        |d          d          z  S )	Nr   
image_keysr   r"   T)embedded_prefixmaskreturn_prelogits)
pre_logitsr
   zToken loss mask is required)rB   preprocess_observationlistrU   keysr   r   r$   r%   r&   rX   r   r   module
vocab_sizer   log_softmaxr[   r   sumclip)rF   rH   r   r   r   input_token_embeddingsr   r   r   targetsr   r   logitslogp	loss_mask
token_pplxs                   r   compute_losszPi0FAST.compute_loss   s    3Ed;;M;R;R;T;T6U6U
 
 

 7;6G6G6T6T3
G":w77	 &..(ABB/N%0
 
  >--2111crc6:111crc3B3;'! . 
 

Aq N&&!!!!gmA&6%6%8%8"89 ' 
 
	 v!!&r!22 *668U666/1226	WWt^"555

Y.R888838CGIWYDZDZ\];^;^^^r              )max_decoding_stepstemperaturer    r   c          
          t          j        d |dt          |j                                                            }                     |          \  }}}t          ||          }t          |||          \  }}}|j        d         t          j
        |d          z
  t          j        |dddff          }t          j        |d          dz
  }	 j                            |||	d	          \  }
}}|
d d dd f         }t          j        |j        d         f          } fd
}fd}t           j                            ||||||ddf          \  }}}}}}|S )NFr   r   r"   r
   )r   r   r   T)r   r   	positionsdecodec           
      Z   | \  }}}}}t           j                            |          \  }t           j                            dk    fdfdd           }t          |t          j        ||j        d         df          |          }t          j	        |t          k    d          }t          j        |          }j                            |d	
          }	d d d f         |z   dz   }
t          j        t          j        z             d d d d f         d d d d f         k    t          j        z             d d d d f         t          j        |z   dz   j        d         ddf          k               }j                            |	||
d	|          \  }}|||||dz   fS )Nr   c                 N    t           j                            z  d          S Nr"   r
   )r$   randomcategorical)r   
last_logitrng_stepr   s    r   <lambda>z6Pi0FAST.sample_actions.<locals>.step.<locals>.<lambda>  s$    #*00:;SZ\0]] r   c                 0    t          j        d          S r   )r   argmax)r   r   s    r   r   z6Pi0FAST.sample_actions.<locals>.step.<locals>.<lambda>  s    #*Zb999 r   )operandr   r   r"   r
   Tr   )r   r   r   r   kv_cache)r$   r   splitlaxcondr1   r   r   r   anyPALIGEMMA_EOS_TOKENallr   r   r   r   )carryrH   output_tokenscacher   steptokenhas_eosall_eostoken_embeddingr   r   r   r   r   r   prefill_lenprefill_sizeprefix_startrF   r   s                @@r   r   z$Pi0FAST.sample_actions.<locals>.step  s   =B:C]E1d  J,,S11MCGLLc!]]]]]]9999	 !  E 0s?OPTW\WbcdWeghVi?j?jlqrrM ge'::DDDGgg&&G #n0040HHO#AAAtG,t3a7I?
<*<<==dD!!!mLP\]^]^]^`dfj]jPkk
<*<<==dD!!!mL#L4$7!$;l>PQR>SUVXY=Z[[] D
 '+n&8&8 /diX\gl '9 ' '#J! 
M8WdQhNNr   c                 *    | \  }}}}}}| |k     z  S rA   r   )r   r   r   r   r   s       r   r   z$Pi0FAST.sample_actions.<locals>.cond1  s(    (-%Aq!QH(:!:;;r   )rB   r   r   rU   r   r   r   r    r   r   r   padr   r   r   zerosr$   r   
while_loop)rF   rH   r   r   r   prefix_token_embeddingsprefix_maskprefix_ar_maskprefix_attn_maskprefix_positionsprefix_logitsr   r   r   r   r   r   r   r   r   s   `  ``            @@@r   sample_actionszPi0FAST.sample_actions   s    3+UtK<N<S<S<U<U7V7V
 
 

 @D?P?PQ\?]?]<n)+~FF BU#[2BB
 B
>.> /4Q7gk333#k1 7#3ffqJ\F]5^__:k;;;a?%)^%7%73:JVfos &8 &
 &
"x
 #111bcc6*
	:#3A#68J"KLL	O 	O 	O 	O 	O 	O 	O 	O 	O 	O@	< 	< 	< 	< 	<
 (+w'9'9$j-5!L(
 (
$1mQ1 r   )rq   rr   rs   r4   rL   rM   r   r`   	typecheckrB   rb   r|   FloatArrayBoolIntr   r   r{   r}   rc   r   rx   floatr   __classcell__)r   s   @r   rI   rI      s       4} 4CH 4 4 4 4 4 40 \#
%#
	rx)+,bgbho.FrxY^H__	`#
 #
 #
 \#
J ot#_ #_ #_?#_171C#_NTn#_hl#_	"(G#	$#_ #_ #_ X#_J  := M M M_M 'M
  "&2"66M M 
M M M XM M M M Mr   rI   ),dataclassesloggingtypingr   r   flax.nnxrL   flax.nnx.bridgebridger   r$   	jax.numpynumpyr   typing_extensionsr   openpi.modelsr   rB   openpi.models.gemma_fastmodels
gemma_fastrv   openpi.models.siglipsiglipr   openpi.sharedr   r`   openpi.shared.nnx_utilssharedrl   	getLoggerloggerr   r   vmapr    r1   	dataclassBaseModelConfigr4   	BaseModelrI   r   r   r   <module>r     s                     $ $ $ $ $ $ $ $ $ 



       & & & & & & ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) & & & & & & & & & , , , , , , + + + + + + + + +		8	$	$ 2 2 28 $ $ 
$0 0 0 d###6 6 6 6 6F* 6 6 $#6rs s s s sf s s s s sr   