
    qi                         d dl Z d dlZd dlZd dlZdZdZeefZ G d d          Zde	dej
        j        fdZe j        d	ej
        j        fd
            Zd Zdddd	ej
        j        de	defdZdS )    Nbatchfsdpc                   8    e Zd ZU dZej        j        dz  ed<   dS )
_MeshStateNactive_mesh)__name__
__module____qualname__r   jaxshardingMesh__annotations__     9/home/robot-lab/Pi0.5_yam/src/openpi/training/sharding.pyr   r      s,         ,0K"T)00000r   r   num_fsdp_devicesreturnc                     t          j                    | z  dk    r't          dt          j                     d|  d          t          j                    | z  | f}t          j        |t          t
          f          S )Nr   zNumber of devices z1 must be divisible by the number of FSDP devices .)r   device_count
ValueError	make_mesh
BATCH_AXIS	FSDP_AXIS)r   
mesh_shapes     r   r   r      s    
,,11y!1!3!3yyfvyyy
 
 	
 "$$(88:JKJ=j)%<===r   meshc              #      K   t           j        t          d          | t           _        	 dV  dt           _        dS # dt           _        w xY w)a  Plumbing the mesh deep into the module tree is extremely cumbersome; until the JAX team lands a better API, a
    custom context manager like this one is the recommended way to maintain a reference to a global mesh. This is only used
    in `activation_sharding_constraint` below.Nz&Cannot nest set_mesh context managers.)r   r   r   )r   s    r   set_meshr      sX      
 )ABBB!J&!%

%%%%s	   = Ac           	          t           j        | S t          j                            | t          j                            t           j        t          j                            t                                        S )N)	r   r   r   laxwith_sharding_constraintr   NamedShardingPartitionSpec	DATA_AXIS)pytrees    r   activation_sharding_constraintr&   (   sU    %7++**:+A3<C]C]^gChChii  r      F)min_size_mbyteslogr(   r)   c                z    |dz  dt           j        ffd}t           j                            ||           S )ah  Apply FSDP sharding to a pytree of arrays based on the mesh shape.

    Args:
        pytree: A pytree to be apply sharding specified by the mesh, note that only array types (eg. contains .shape attr)
          will be considered for sharding.
        mesh: The mesh being used for applying sharding on to pytree.
        min_size_mbytes: The minimum size of the array in MiB to be considered for sharding, any array smaller than this
          will be replicated.
        log: If true, will log the sharding decisions for arrays that are being considered for sharding.

    Returns:
        The sharded pytree.
       arrayc                    j         t                   dk    r<t          j                            t          j                                                  S t          |d          s<t          j                            t          j                                                  S t          |j                   dk     r<t          j                            t          j                                                  S t          j	        |j                   t          j
        |j
                  j        z  x}k     r<t          j                            t          j                                                  S t          j        |j                   d d d         }d gt          |          z  }|D ]}|j         |         j         t                   z  dk    rrFt          j        dt          j                            |            d|j          d|d	z  d
d|            t          ||<   t          j                            t          j        j        |           c S rDt          j        dt          j                            |            d|j          dj                     t          j                            t          j                                                  S )N   shape   r   z	Sharding z
 of shape z (r+   z.2fz MiB) along axis z$Could not find a valid sharding for z with mesh of shape )r/   r   r   r   r"   r#   hasattrlennpproddtypeitemsizeargsortlogginginfo	tree_utilkeystrwarning)	kpr,   arr_sizeaxesspecir)   r   min_size_bytess	         r   
_shard_arrz!fsdp_sharding.<locals>._shard_arrF   s   :i A%%<--dCL4N4N4P4PQQQug&& 	R<--dCL4N4N4P4PQQQu{a<--dCL4N4N4P4PQQQ,,rx/D/D/MMMHQ___<--dCL4N4N4P4PQQQ z%+&&ttt,vD		! 	[ 	[A{1~
9 55:: L BCM$8$8$<$<  B  B  B  BW_bgWg  B  B  B~  B  B   $Q|11$8RTX8YZZZZZ ;  	O Is}7K7KB7O7O  I  I[`[f  I  I  }A  }G  I  I   |))$0J0J0L0LMMMr   )r   ShapeDtypeStructr;   tree_map_with_path)r%   r   r(   r)   rD   rC   s    ` ` @r   fsdp_shardingrG   0   sg    ( %u,NNc2 N N N N N N N N@ =++J???r   )
contextlibr9   r   numpyr4   r   r   r$   r   intr   r   r   contextmanagerr   r&   boolrG   r   r   r   <module>rM      s-        



    
	#	1 1 1 1 1 1 1 1> >(9 > > > > 
&3<$ 
& 
& 
& 
&   6@ 6@ 6@
,
6@ 	6@
 
6@ 6@ 6@ 6@ 6@ 6@r   