
    qi./                     L   d Z ddlmZ ddlmZ ddlZddlmZ	 ddlZ
ddlmc mZ de	j        fdZe	j        fdZ G d dej                  Z G d	 d
ej                  Z G d dej                  Z G d dej                  Z G d dej                  ZddddZd ZdS )zIA refactored and simplified ViT adoptation for Pi, taken from big_vision.    )SequenceNg     @c                 V   t           j        d| d|f         \  }}|dz  dk    s
J d            t          j        |dz            |dz  dz
  z  }d||z  z  }t          j        d|                                |          }t          j        d|                                |          }t          j        t          j        |          t          j        |          t          j        |          t          j        |          gd          }t          j        ||          dddddf         S )	zFollows the MoCo v3 logic.N   r   z)Width must be mult of 4 for sincos posemb   g      ?zm,d->mdaxis)	jnpmgridarangeeinsumflattenconcatenatesincosasarray)	hwwidthtemperaturedtypeyxomegapes	            5/home/robot-lab/Pi0.5_yam/src/openpi/models/siglip.pyposemb_sincos_2dr      s    9RaR!VDAq19>>>F>>>Juz""eqj1n5E;%&E
9aiikk511A
9aiikk511A	#'!**cgajj#'!**cgajjIPQ	R	R	RB;r5!!$111*--    c           	          |dk    r`|                      |t          j                            dt	          j        |          z            dt	          j        |          |f|          S |dk    rt          g ||R d|iS t          d|           )Nlearnr   stddevsincos2dr   zUnknown posemb type: )	paramnninitializersnormalnpsqrtprodr   
ValueError)selftypseqshaper   namer   s         r   
get_posembr/   (   s    
g~~zzO""!bgenn*<"==!!5)	
 
 	
 j>>5>>>>>>
2S22
3
33r   c                   f    e Zd ZU dZdZedz  ed<   dZeed<   dZ	e
ed<   ej        d
d	            ZdS )MlpBlockz%Transformer MLP / feed-forward block.Nmlp_dim        dropoutfloat32dtype_mmTc                    t           j                                        t           j                            d          d}|j        \  }}} t          j        | j        pd|z  fd| j        i||          }t          j        |          } t          j	        | j
                  ||          } t          j        |fd| j        i||          S )z$Applies Transformer MlpBlock module.gư>r    )kernel_init	bias_initr   r   rate)r$   r%   xavier_uniformr&   shapeDenser2   r6   geluDropoutr4   )r+   r   deterministicinits_ds         r   __call__zMlpBlock.__call__<   s     ?99;;//t/<<
 

 '1aIBHT\*QUII$-I5II!LLGAJJ)BJDL)))!];;8rx888%88;;;r   T)__name__
__module____qualname____doc__r2   int__annotations__r4   floatr6   strr$   compactrE    r   r   r1   r1   5   sp         //GS4ZGUHcZ< < < Z< < <r   r1   c                   t    e Zd ZU dZdZedz  ed<   dZeed<   dZe	ed<   dZ
eed	<   ej        dd            ZdS )Encoder1DBlockz.Single transformer encoder block (MHSA + MLP).Nr2      	num_headsr3   r4   r5   r6   Tc                    i }t          j        |          } t          j        | j                  |          } t          j        | j        t          j                                        || j                  ||          x}|d<   t          j        |          } t          j	        | j
                  ||          }||z   x}|d<    t          j        | j                  |          } t          | j        | j
        | j                  ||          x}|d<   t          j        |          } t          j	        | j
                  ||          }||z   x}|d<   t          j        |          }||fS )	Nr   )rT   r8   rA   r   sar:   z+sa)r2   r4   r6   mlpz+mlp)shardingactivation_sharding_constraintr$   	LayerNormr6   MultiHeadDotProductAttentionrT   r%   r<   r@   r4   r1   r2   )r+   r   rA   outr   s        r   rE   zEncoder1DBlock.__call__S   s   3A66-BLt}---a00
7n6688'-	
 
 

 Q  	CI 3A66)BJDL)))!];;QCJ-BLt}---a00
LL]
 
 
 ]	  	CJ
 3A66)BJDL)))!];;a%CK3A66#vr   rF   )rG   rH   rI   rJ   r2   rK   rL   rT   r4   rM   r6   rN   r$   rO   rE   rP   r   r   rR   rR   K   s         88GS4ZIsGUHcZ   Z  r   rR   c                       e Zd ZU dZeed<   dZedz  ed<   dZeed<   dZe	ed<   d	Z
eed
<   dZeed<   dZeed<   ej        dd            ZdS )Encoderz?Transformer Model Encoder for sequence to sequence translation.depthNr2   rS   rT   r3   r4   Fscannothing_saveableremat_policyr5   r6   Tc           
         i }| j         rt          j        t          ddt	          t
          j        | j        d                     }  t          j         |ddidddt          j        | j	                  d	| j
        | j        | j        | j        
          ||          \  }}t          | j	                  D ],}t
          j                            |fd|          |d|d<   -n]t          | j	                  D ]C}t          d| | j
        | j        | j        | j        
          } |||          \  }|d|d<   D||d<    t          j        d| j
                  |          |fS )NF)   )prevent_csestatic_argnumspolicyparamsr   T)ri   r4   )variable_axes
split_rngsin_axeslengthencoderblock)r.   r6   r2   rT   r4   c                     | |         S NrP   )olyrs     r   <lambda>z"Encoder.__call__.<locals>.<lambda>   s
    3 r   block02dencoderblock_pre_lnencoder_norm)r.   r   )ra   r$   rematrR   getattrjaxcheckpoint_policiesrc   	broadcastr`   r6   r2   rT   r4   rangetreemapr[   )r+   r   rA   r]   rt   scan_outrr   	block_curs           r   rE   zEncoder.__call__z   s   9 !	H!#s68I4PP	  E "''m&*t<<z   $.      KAx TZ(( [ [),C6O6O6OQY)Z)Z%C%%%&&[ TZ(( H H*...!] L"n L  	 -6Ia,G,G)3(s((())CMEr|t}EEEaHH#MMr   rF   )rG   rH   rI   rJ   rK   rL   r2   rT   r4   rM   ra   boolrc   rN   r6   r$   rO   rE   rP   r   r   r_   r_   o   s         IIJJJGS4ZIsGUD$*L#***HcZ&N &N &N Z&N &N &Nr   r_   c                   d    e Zd ZU dZdZedz  ed<   dZeed<   dZe	ed<   e
j        d             ZdS )	MAPHeadzMultihead Attention Pooling.Nr2   rS   rT   r5   r6   c                    |j         \  }}}|                     dt          j                                        dd|f|j                  }t          j        ||ddg          } t          j        | j	        | j
        t          j                                                  ||          } t          j        | j
                  |          }| t          | j        | j
                  |          z   }|d d df         S )Nprober   )rT   r   r8   rV   )r2   r   r   )r=   r#   r$   r%   r<   r   r	   tiler\   rT   r6   r[   r1   r2   )r+   r   nrC   rD   r   r   s          r   rE   zMAPHead.__call__   s    '1a

7BO$B$B$D$Dq!QiQRQXYYAq	**
B+n-6688
 
 
 	  .BLt}---a00CT]CCCAFFFAwr   )rG   rH   rI   rJ   r2   rK   rL   rT   r6   rN   r$   rO   rE   rP   r   r   r   r      sk         &&GS4ZIsHcZ  Z  r   r   c                      e Zd ZU dZdZedz  ed<   dZee         ed<   dZ	eed<   dZ
eed	<   dZedz  ed
<   dZeed<   dZeed<   dZeez  ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   ej        ddd            ZdS )_Modulez
ViT model.Nnum_classes)   r   
patch_size   r   rS   r`   r2   rT   r   posembFrep_sizer3   r4   gap	pool_typeThead_zeroinitra   rb   rc   r5   r6   )trainc                   i }t          j        |t           j                  } t          j        | j        | j        | j        ddt           j                  |          x}|d<   |j        \  }}}}t          j        ||||z  |g          }|t          | | j
        ||f|dt           j                  z   x}|d<   | j        dk    r]|                     dt          j        j        d	d	|f|j                  }	t          j        t          j        |	|d	d	g          |gd	
          }|j        \  }}
} t          j        | j                  ||           }|                    | j                  } t-          | j        | j        | j        | j        | j        | j        | j        d          ||           \  }|d<   |x}|d<   | j        dk    r0 t9          | j        | j        | j                  |          x}|d<   n| j        dk    rt          j        |d	
          x}|d<   nl| j        dk    r|d d df         x}|d<   nO| j        dk    r |d d df         x}|d<   |d d d	d f         }n$| j        dk    rnt=          d| j         d          t          j        ||||dg          }| j        rm| j        du r| j        n| j        }t          j         || j        d          }t          j!         ||                    }t          j!         ||                    }||d<   ||d<   | j"        r[| j#        rdt          j        j        ini }t          j         | j"        f| j        d d|} ||          x}|d!<    ||          x}|d"<   ||fS )#NVALID	embedding)stridespaddingr.   r   stempos_embeddingwith_posembtokclsr   r   r:   Transformer)r`   r2   rT   r4   ra   rc   r6   r.   )rA   encoderencodedr   )rT   r2   r   
head_inputr   0r   nonezUnknown pool type: ''T
pre_logits)r   r.   pre_logits_2dr8   head	logits_2dlogits)$r	   r   r5   r$   Convr   r   r=   reshaper/   r   r   r#   r%   zerosr   r   r   r@   r4   astyper6   r_   r`   r2   rT   ra   rc   r   meanr*   r   r>   tanhr   r   )r+   imager   r]   r   r   r   r   cr   rC   r   x_2dr   hidkwr   s                    r   rE   z_Module.__call__   s    E3;//
"'JOO+
 
 
   	CK W
1aKAq1ua=)) "#ZdkAq61o_b_j%k%k!kkC>U""**UBO$9Aq!9agNNC#1ay!9!91 =AFFFA'1a)BJDL)))!Y77 HHT]##	
G*LnL*]	
 	
 	
 u9	& 	& 	&3y> $%$#i.>U""%G.m% % % 	% % AL!!
 ^u$$$'HQQ$7$7$77AL!!^s""$%aaadG+AL!!^u$$$%aaadG+AL!aaaenGG^v%%EDNEEEFFF{7Q1bM22= 	 %)]d%:%:tzzH(84=|LLLC 733t99%%DAA#OL 	(;?;MU-!677SUB8D,UDMUURTUUD&*d4jj0D3{# $Q'AH#vr   )rG   rH   rI   rJ   r   rK   rL   r   r   r   r`   r2   rT   r   rN   r   r   r4   rM   r   r   ra   rc   r6   r$   rO   rE   rP   r   r   r   r      s8        "Kt""" (J(((E3E3OOOGS4ZIsFC HcDj   GUIsM4D$*L#***HcZ', R R R R ZR R Rr   r   )variantc                >    t          | fi i t          |          |S )zAFactory function, because linen really don't like what I'm doing!)r   decode_variant)r   r   r   s      r   Moduler   %  s+    ;DD"C^G%<%<"C"CDDDr   c                 <   | i S | i }}d| v r8|                      d          \  }}dt          |          t          |          fi}dddddd	d
ddddddd|         dddddddddddddd|         ddddddddddddd d|         d!d"d#d$dd%d%d%d%d%d%d%d%d|         d&|S )'z8Converts a string like "B" or "B/32" into a params dict.N/r          i  i   r   i   i  i   i  i   i  i   )muTiSMBLSo400mHgzg-optGzG-opter   rS         (   0   8      i   i   i   i  i   i   i    i <  re            r   )r   r`   r2   rT   )splitrK   )r   vpatchs      r   r   r   *  sb   	uA
g~~==%%5E

CJJ78 
 
   
 
   
 
   
 
 aA A@ AA Ar   rp   )rJ   collections.abcr   
flax.linenlinenr$   r{   	jax.numpynumpyr	   r'   openpi.training.shardingtrainingrY   r5   r   r/   r   r1   rR   r_   r   r   r   rP   r   r   <module>r      s   P O $ $ $ $ $ $       



           + + + + + + + + + /7ck 
. 
. 
. 
. 8;{ 
4 
4 
4 
4< < < < <ry < < <,! ! ! ! !RY ! ! !H2N 2N 2N 2N 2Nbi 2N 2N 2Nj    bi   0f f f f fbi f f fRE E E E E E
K K K K Kr   