o
    vžÄi>  ã                   @   sF   d dl mZ d dlmZ d dlmZ d dlmZ eG dd„ dƒƒZdS )é    )Ú	dataclass)Úcached_property)Úinit_device_mesh)Úlogc                   @   s¾   e Zd ZU eed< eed< eed< eed< eed< eed< eed< dd	„ Zd
d„ Zdd„ Ze	dd„ ƒZ
e	dd„ ƒZe	dd„ ƒZe	dd„ ƒZe	dd„ ƒZe	dd„ ƒZe	dd„ ƒZedd„ ƒZdS )ÚParallelDimsÚdp_replicateÚdp_shardÚcpÚtpÚppÚ
world_sizeÚenable_loss_parallelc                 C   s   |   ¡  d S ©N)Ú	_validate©Úself© r   ú[/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/reason1/parallelisms/parallel_dims.pyÚ__post_init__"   s   zParallelDims.__post_init__c                 C   s0  | j | j| j| j| jf\}}}}}||||fD ]
}|dks!J dƒ‚q|dks.|dks.J dƒ‚|dk r\t d| j› d|| | | › d¡ | j|| | |   | _}t d	|› d¡ |dksbJ ‚|| | | | | jks–| j|| | |  | _ t d
|› d|› d|› d|› d|› d| j› d¡ d S d S )Né   z6Parallelism degree should be >= 1, except for dp_shardéÿÿÿÿz dp_shard must -1 or >=1.r   zLdp_shard is set to -1, will be automatically determined based on world_size z // Ú.zdp_shard is set to z$Invalid parallel dims: dp_replicate(z) * dp_shard(z) * cp(z) * tp(z) * pp(z) != WORLD_SIZE(ú))	r   r   r	   r
   r   r   Úinfor   Úwarning)r   r   r   r	   r
   r   Údr   r   r   r   %   s<   ûÿÿÿÿÿÿþzParallelDims._validatec           
      C   sR  g }g }t | j| j| j| j| jgg d¢ƒD ]\}}|dkr'| |¡ | |¡ qt dt	|ƒ› d|› d|› ¡ t
|ƒ}t|||d}g }g }g }	| jrV| d¡ |	 d¡ | jrh| d¡ | d¡ |	 d¡ | jru| d	¡ |	 d	¡ |g krƒ|t
|ƒ jd
d |g kr‘|t
|ƒ jdd |	g krŸ|t
|	ƒ jdd t d|› ¡ |S )N)r   r   r   r	   r
   r   z	Building z-D device mesh with z, )Úmesh_dim_namesr   r   r	   Údp)Úmesh_dim_nameÚdp_shard_cpÚdp_cpzmesh: )Úzipr   r   r   r	   r
   Úappendr   r   ÚlenÚtupler   Údp_replicate_enabledÚdp_shard_enabledÚ
cp_enabledÚ_flatten)
r   Údevice_typeÚdimsÚnamesr   ÚnameÚmeshZdp_mesh_dim_namesZdp_shard_cp_mesh_dim_namesZdp_cp_mesh_dim_namesr   r   r   Ú
build_mesh@   sD   þ

€ 






zParallelDims.build_meshc                 C   s   | j dkp	| jdkS ©Nr   )r   r   r   r   r   r   Ú
dp_enabledl   s   zParallelDims.dp_enabledc                 C   ó
   | j dkS r/   )r   r   r   r   r   r%   p   ó   
z!ParallelDims.dp_replicate_enabledc                 C   r1   r/   )r   r   r   r   r   r&   t   r2   zParallelDims.dp_shard_enabledc                 C   r1   r/   )r	   r   r   r   r   r'   x   r2   zParallelDims.cp_enabledc                 C   r1   r/   )r
   r   r   r   r   Ú
tp_enabled|   r2   zParallelDims.tp_enabledc                 C   r1   r/   )r   r   r   r   r   Ú
pp_enabled€   r2   zParallelDims.pp_enabledc                 C   s   | j dko| jS r/   )r
   r   r   r   r   r   Úloss_parallel_enabled„   s   z"ParallelDims.loss_parallel_enabledc                 C   s   | j | j | j S r   )r	   r
   r   r   r   r   r   Únon_data_parallel_sizeˆ   s   z#ParallelDims.non_data_parallel_sizeN)Ú__name__Ú
__module__Ú__qualname__ÚintÚ__annotations__Úboolr   r   r.   Úpropertyr0   r%   r&   r'   r3   r4   r5   r   r6   r   r   r   r   r      s6   
 ,






r   N)	Údataclassesr   Ú	functoolsr   Útorch.distributed.device_meshr   Ú#cosmos_policy._src.imaginaire.utilsr   r   r   r   r   r   Ú<module>   s   