
    qimD                         d dl Z d dlZd dlZd dlZd dlmZ d dlZd dl	m
Z
 d dlmc mc mZ d dlmc mZ  G d d          Z G d d          Z G d d          Z G d	 d
          ZdS )    N)AutoProcessorc            	       f    e Zd Zd	defdZd
dedej        dz  deej        ej        f         fdZ	dS )PaligemmaTokenizer0   max_lenc                     || _         t          j        dddi          }|                    d          5 }t	          j        |                                          | _        d d d            d S # 1 swxY w Y   d S )N)gs://big_vision/paligemma_tokenizer.modeltokenanongsrbmodel_proto)_max_lendownloadmaybe_downloadopensentencepieceSentencePieceProcessorread
_tokenizer)selfr   pathfs       8/home/robot-lab/Pi0.5_yam/src/openpi/models/tokenizer.py__init__zPaligemmaTokenizer.__init__   s    &'RX_agWhiiiYYt__ 	Y+BqvvxxXXXDO	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Ys   -A//A36A3Npromptstatereturnc                 n   |                                                     dd                              dd          }|t          j        |t          j        ddd          d d                   dz
  }d                    t          t          |                    }d| d	| d
}| j        	                    |d          }n7| j        	                    |d          | j        	                    d          z   }t          |          }|| j        k     rdg| j        |z
  z  }	dg|z  |	z   }
||	z   }n_t          |          | j        k    r-t          j        dt          |           d| j         d           |d | j                 }dg| j        z  }
t          j        |          t          j        |
          fS )N_ 
     binsTask: 	, State: z
;
Action: Tadd_bosFToken length () exceeds max length (g), truncating. Consider increasing the `max_token_len` in your model config if this happens frequently.)stripreplacenpdigitizelinspacejoinmapstrr   encodelenr   loggingwarningasarray)r   r   r   cleaned_textdiscretized_state	state_strfull_prompttokens
tokens_lenpaddingmasks              r   tokenizezPaligemmaTokenizer.tokenize   s   ||~~--c377??cJJ "EB78S8STWUWTW8X Y Y Y\] ]S*;!<!<==IP<PP)PPPK_++K+FFFF _++L$+GG$/J`J`aeJfJffF[[
%%g!;<G6J&0Dg%FF6{{T]**oS[[ o o o o o   OdmO,F6DM)Dz&!!2:d#3#333    )r   )N)
__name__
__module____qualname__intr   r8   r3   ndarraytuplerF    rG   r   r   r      s        Y Y Y Y Y Y4 4s 42:+< 4bjZ\ZdNdHe 4 4 4 4 4 4rG   r   c                       e Zd ZddedefdZdedej        dej        d	z  d
eej        ej        ej        ej        f         fdZ	dej        deded
ej        fdZ
dej        ee         z  d
ej        fdZd	S )FASTTokenizer   physical-intelligence/fastr   fast_tokenizer_pathc                 :   || _         t          j        dddi          }|                    d          5 }t	          j        |                                          | _        d d d            n# 1 swxY w Y   t          j	        |d          | _
        d	| _        d S )
Nr	   r
   r   r   r   r   T)trust_remote_code   )r   r   r   r   r   r   r   _paligemma_tokenizerr   from_pretrained_fast_tokenizer_fast_skip_tokens)r   r   rS   r   r   s        r   r   zFASTTokenizer.__init__4   s     &'RX_agWhiiiYYt__ 	c(5(LYZY_Y_YaYa(b(b(bD%	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c  -<=Pdhiii!$s   -A..A25A2r   r   actionsNr    c                 l   |                                                                                     dd          }t          j        |t          j        ddd          d d                   dz
  }d                    t          t          |                    }d| d| d	}| j	        
                    |d
          }||                     |d                    d         }	|                     |	          }
| j	        
                    d          |
                                z   | j	        
                    dd
          z   }ng }||z   }d
gt          |          z  }dgt          |          z  dgt          |          z  z   }dgt          |          z  d
gt          |          z  z   }t          |          }|| j        k     r#dg| j        |z
  z  }||z   }||z   }||z   }||z   }nt          |          | j        k    r-t!          j        dt          |           d| j         d           |d | j                 }|d | j                 }|d | j                 }|d | j                 }t          j        |          t          j        |          t          j        |          t          j        |          fS )Nr"   r#   r%   r&   r'   r(   r*   r+   ;
Tr,   r   Action: |)add_eosFr.   r/   r0   )lowerr1   r2   r3   r4   r5   r6   r7   r8   rW   r9   rY   _act_tokens_to_paligemma_tokenstolistr:   r   r;   r<   r=   )r   r   r   r[   r>   r?   r@   prefixprefix_tokensaction_tokensaction_tokens_in_pgpostfix_tokensrB   
token_maskar_mask	loss_maskrC   rD   s                     r   rF   zFASTTokenizer.tokenize@   s    ||~~++--55c3?? KBKAw4O4OPSQSPS4TUUUXYY HHS&78899	?,?????1888NN 00??BM"&"F"F}"U"U )00<<%,,../+2232EEF N  N /Vc&kk)
#M***aS3~3F3F-FFGc-000D6C<O<O3OO	 [[
%%g!;<Gg%F#g-J'G!G+II6{{T]**oS[[ o o o o o   OdmO,F#OdmO4Joo.G!/DM/2Iz&!!2:j#9#92:g;N;NPRPZ[dPePeeerG   rB   action_horizon
action_dimc                    | j                             |                                          }d|vr"t          j        ||ft          j                  S t          j        | j                             |                    d          d                             d          d         	                                                    }| 
                    |          }| j                            |                                g||          d         S )Nr^   dtyper&   r_   r   )time_horizonrm   )rW   decoderc   r3   zerosfloat32arrayr9   splitr1   rb   rY   r   rB   rl   rm   decoded_tokensraw_action_tokensrf   s          r   extract_actionszFASTTokenizer.extract_actionsw   s    299&--//JJ ^++8^Z8
KKKK H%,,^-A-A*-M-Ma-P-V-VWZ-[-[\]-^-d-d-f-fgg
 
 <<=NOO#**!!##$>j + 
 

 	rG   c                     t          |t                    rt          j        |          }| j                                        dz
  | j        z
  |z
  S Nr&   
isinstancelistr3   ru   rW   
vocab_sizerZ   r   rB   s     r   rb   z-FASTTokenizer._act_tokens_to_paligemma_tokens   K    fd## 	&Xf%%F(33559D<RRU[[[rG   )rQ   rR   )rH   rI   rJ   rK   r8   r   r3   rL   rM   rF   rz   r   rb   rN   rG   r   rP   rP   3   s       
% 
% 
% 
% 
% 
% 
%5f5f"$*5f79zD7H5f	rz2:rz2:=	>5f 5f 5f 5fnbj # SV [][e    "\bj496L \QSQ[ \ \ \ \ \ \rG   rP   c                       e Zd ZdZddedefdZdedej        dej        d	z  d
e	ej        ej        ej        ej        f         fdZ
dej        deded
ej        fdZdej        ee         z  d
ej        fdZd	S )BinningTokenizerz:
    Standard RT-2 / OpenVLA style binning tokenizer.
    rQ   r   n_binsc                    || _         || _        t          j        dddi          }|                    d          5 }t          j        |                                          | _        d d d            n# 1 swxY w Y   d| _	        d S )Nr	   r
   r   r   r   r   rV   )
r   _n_binsr   r   r   r   r   r   rW   rZ   )r   r   r   r   r   s        r   r   zBinningTokenizer.__init__   s     &'RX_agWhiiiYYt__ 	c(5(LYZY_Y_YaYa(b(b(bD%	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c "%s   -A55A9<A9r   r   r[   Nr    c                    |                                                                                     dd          }t          j        |t          j        ddd          dd                   dz
  }d                    t          t          |                    }d| d	| d
}| j	        
                    |d          }|t          d          g }	||	z   }
dgt          |
          z  }dgt          |          z  dgt          |	          z  z   }dgt          |          z  dgt          |	          z  z   }t          |
          }|| j        k     r#dg| j        |z
  z  }|
|z   }
||z   }||z   }||z   }nt          |
          | j        k    r-t          j        dt          |
           d| j         d           |
d| j                 }
|d| j                 }|d| j                 }|d| j                 }t          j        |
          t          j        |          t          j        |          t          j        |          fS )a  Tokenize a prompt and state into a sequence of tokens.

        Args:
            prompt: The text prompt to tokenize.
            state: The state array to discretize and tokenize.
            actions: Must be None. Action encoding is not currently supported.

        Returns:
            A tuple of (tokens, token_mask, ar_mask, targets).

        Raises:
            NotImplementedError: If actions is not None.
        r"   r#   r%   r&   r'   Nr(   r*   r+   r]   Tr,   zOBinningTokenizer does not support encoding actions atm (only for inference use)r   Fr.   r/   r0   ra   r1   r2   r3   r4   r5   r6   r7   r8   rW   r9   NotImplementedErrorr:   r   r;   r<   r=   r   r   r   r[   r>   r?   r@   rd   re   rh   rB   ri   rj   rk   rC   rD   s                   r   rF   zBinningTokenizer.tokenize   sg     ||~~++--55c3?? KBKAw4O4OPSQSPS4TUUUXYY HHS&78899	?,?????1888NN%&wxxx /Vc&kk)
#M***aS3~3F3F-FFGc-000D6C<O<O3OO	 [[
%%g!;<Gg%F#g-J'G!G+II6{{T]**oS[[ o o o o o   OdmO,F#OdmO4Joo.G!/DM/2Iz&!!2:j#9#92:g;N;NPRPZ[dPePeeerG   rB   rl   rm   c                    | j                             |                                          }d|vr"t          j        ||ft          j                  S t          j        | j                             |                    d          d                             d          d         	                                                    }| 
                    |          }t          |          ||z  k     r"t          j        ||gt          j                  S |d ||z                               ||g          }|| j        z  dz  dz
  S )Nr^   ro   r&   r_   r      )rW   rr   rc   r3   rs   rt   ru   r9   rv   r1   rb   r:   reshaper   rw   s          r   rz   z BinningTokenizer.extract_actions   s0   299&--//JJ ^++8^Z8
KKKK H%,,^-A-A*-M-Ma-P-V-VWZ-[-[\]-^-d-d-f-fgg
 
 <<=NOO} ;;;8^Z8
KKKK%&E*)D&EFNNP^`jOkllt|+a/!33rG   c                     t          |t                    rt          j        |          }| j                                        dz
  | j        z
  |z
  S r|   r}   r   s     r   rb   z0BinningTokenizer._act_tokens_to_paligemma_tokens   r   rG   )rQ   rQ   )rH   rI   rJ   __doc__rK   r   r8   r3   rL   rM   rF   rz   r   rb   rN   rG   r   r   r      s        	% 	% 	%3 	% 	% 	% 	%8f8f"$*8f79zD7H8f	rz2:rz2:=	>8f 8f 8f 8ft4bj 4# 4SV 4[][e 4 4 4 4$\bj496L \QSQ[ \ \ \ \ \ \rG   r   c                       e Zd ZdZddededz  fdZdedej        d	ej        dz  d
e	ej        ej        ej        ej        f         fdZ
dej        deded
ej        fdZdej        ee         z  d
ej        fdZdS )FSQTokenizerz6
    FSQ tokenizer from the FAST paper baselines.
    rQ   Nr   fsq_tokenizer_pathc                     | _         |
J d            t          j        |          }t          j                            |t          j        |          d                   }t          |                    d          d                   }|	                    dd          d         }t          j        |t          j                    t          j                    t          j                    dt          j        d                    }	 |                    |t          j                            t          j                                        t          j                                        	          
          }|d         }	|d          _        t+          j        di |	 _        n(# t0          $ r}
t3          d| d|
          |
d }
~
ww xY wt5          j         fd           _        t5          j         fd           _        t          j        dddi          }|                    d          5 }t?          j         |!                                           _"        d d d            n# 1 swxY w Y   d _#        d S )Nz#fsq_tokenizer_path must be providedr   /r%   r&   )params	opt_stateconfig)max_to_keep)item_handlersoptions)r   r   )argsr   r   z-Failed to load FSQ tokenizer checkpoint from z	. Error: c                 V    j                             d| i|j         j                  S Nr   )method)_fsq_tokenizerapplyrF   r   xr   s     r   <lambda>z'FSQTokenizer.__init__.<locals>.<lambda>  s+    d17768JAVZViVr7ss rG   c                 V    j                             d| i|j         j                  S r   )r   r   
detokenizer   s     r   r   z'FSQTokenizer.__init__.<locals>.<lambda>"  s+    d17768JAVZViVt7uu rG   r	   r
   r   r   r   r   rV   rN   )$r   r   r   osr   r6   listdirrK   rv   rsplitocpCheckpointManagerStandardCheckpointHandlerJsonCheckpointHandlerCheckpointManagerOptionsrestorer   	CompositeJsonRestoreStandardRestore_paramsfsq_tokenizerFsqAttentionTokenizerr   	ExceptionRuntimeErrorjaxjit_tokenize_fn_detokenize_fnr   r   r   r   rW   rZ   )r   r   r   r   tok_pathstep	base_pathmgrrestoredr   er   s   `           r   r   zFSQTokenizer.__init__   s   !--/T---&'9::7<<bj&6&6q&9:: 8>>#&&r*++OOC++A.	#799 :<<355 
 0Q???
 
 

	{{38--SX5I5I5K5KTWT\TlTlTnTn-oo #  H h'F#H-DL"/"E"O"O"O"OD 	 	 	b@Rbb]^bb 	  Gssss
 
 "guuuu
 

 &'RX_agWhiiiYYt__ 	c(5(LYZY_Y_YaYa(b(b(bD%	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c 	c "%s+   BF! !
G+GG.-I''I+.I+r   r   r[   r    c                    |                                                                                     dd          }t          j        |t          j        ddd          d d                   dz
  }d                    t          t          |                    }d| d| d	}| j	        
                    |d
          }|t          d          g }	||	z   }
d
gt          |
          z  }dgt          |          z  dgt          |	          z  z   }dgt          |          z  d
gt          |	          z  z   }t          |
          }|| j        k     r#dg| j        |z
  z  }|
|z   }
||z   }||z   }||z   }nt          |
          | j        k    r-t          j        dt          |
           d| j         d           |
d | j                 }
|d | j                 }|d | j                 }|d | j                 }t          j        |
          t          j        |          t          j        |          t          j        |          fS )Nr"   r#   r%   r&   r'   r(   r*   r+   r]   Tr,   zKFSQTokenizer does not support encoding actions atm (only for inference use)r   Fr.   r/   r0   r   r   s                   r   rF   zFSQTokenizer.tokenize,  sg    ||~~++--55c3?? KBKAw4O4OPSQSPS4TUUUXYY HHS&78899	?,?????1888NN%&sttt /Vc&kk)
#M***aS3~3F3F-FFGc-000D6C<O<O3OO	 [[
%%g!;<Gg%F#g-J'G!G+II6{{T]**oS[[ o o o o o   OdmO,F#OdmO4Joo.G!/DM/2Iz&!!2:j#9#92:g;N;NPRPZ[dPePeeerG   rB   rl   rm   c                 X   | j                             |                                          }d|vr"t          j        ||ft          j                  S t          j        | j                             |                    d          d                             d          d         	                                                    }| 
                    |          }	 t          j        d          d         }t          j        |          5  |                     | j        |d                   d         }d d d            n# 1 swxY w Y   |d ||z                               ||g          S # t"          $ r7}	t%          j        d|	            t          j        ||f          cY d }	~	S d }	~	ww xY w)	Nr^   ro   r&   r_   r   cpu)N.zError decoding FSQ: )rW   rr   rc   r3   rs   rt   ru   r9   rv   r1   rb   r   devicesdefault_devicer   r   r   r   r;   r<   )
r   rB   rl   rm   rx   ry   rf   device	detok_actr   s
             r   rz   zFSQTokenizer.extract_actionsY  s   299&--//JJ ^++8^Z8
KKKK H%,,^-A-A*-M-Ma-P-V-VWZ-[-[\]-^-d-d-f-fgg
 
 <<=NOO	:[''*F#F++ [ [ //mI>VWWXYZ	[ [ [ [ [ [ [ [ [ [ [ [ [ [ [:~
::;CC^U_D`aaa 	: 	: 	:O61667778^Z899999999	:sB   .E( (D;/E( ;D??E( D?$E( (
F)2,F$F)$F)c                     t          |t                    rt          j        |          }| j                                        dz
  | j        z
  |z
  S r|   r}   r   s     r   rb   z,FSQTokenizer._act_tokens_to_paligemma_tokensp  r   rG   )rQ   N)rH   rI   rJ   r   rK   r8   r   r3   rL   rM   rF   rz   r   rb   rN   rG   r   r   r      s        /% /% /%sTz /% /% /% /%b+f+f"$*+f79zD7H+f	rz2:rz2:=	>+f +f +f +fZ:bj :# :SV :[][e : : : :.\bj496L \QSQ[ \ \ \ \ \ \rG   r   )r;   r   r   numpyr3   orbax.checkpoint
checkpointr   r   transformersr   !openpi.models.utils.fsq_tokenizermodelsutilsr   openpi.shared.downloadsharedr   r   rP   r   r   rN   rG   r   <module>r      s|    				 



               & & & & & & 9 9 9 9 9 9 9 9 9 9 9 9 ) ) ) ) ) ) ) ) )"4 "4 "4 "4 "4 "4 "4 "4JX\ X\ X\ X\ X\ X\ X\ X\B_\ _\ _\ _\ _\ _\ _\ _\D}\ }\ }\ }\ }\ }\ }\ }\ }\ }\rG   