o
    ?߱i@                     @   sF   d dl mZ d dlmZ d dlmZ d dlmZ eG dd dZdS )    )	dataclass)cached_property)init_device_mesh)logc                   @   s   e Zd ZU eed< eed< eed< eed< eed< eed< eed< dd	 Zd
d Zdd Ze	dd Z
e	dd Ze	dd Ze	dd Ze	dd Ze	dd Ze	dd Zedd ZdS )ParallelDimsdp_replicatedp_shardcptppp
world_sizeenable_loss_parallelc                 C   s   |    d S N)	_validateself r   a/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/reason1/parallelisms/parallel_dims.py__post_init__"   s   zParallelDims.__post_init__c                 C   s0  | j | j| j| j| jf\}}}}}||||fD ]
}|dks!J dq|dks.|dks.J d|dk r\td| j d|| | |  d | j|| | |   | _}td	| d |dksbJ || | | | | jks| j|| | |  | _ td
| d| d| d| d| d| j d d S d S )N   z6Parallelism degree should be >= 1, except for dp_shardz dp_shard must -1 or >=1.r   zLdp_shard is set to -1, will be automatically determined based on world_size z // .zdp_shard is set to z$Invalid parallel dims: dp_replicate(z) * dp_shard(z) * cp(z) * tp(z) * pp(z) != WORLD_SIZE())	r   r   r	   r
   r   r   infor   warning)r   r   r   r	   r
   r   dr   r   r   r   %   s<   zParallelDims._validatec           
      C   sR  g }g }t | j| j| j| j| jgg dD ]\}}|dkr'|| || qtdt	| d| d|  t
|}t|||d}g }g }g }	| jrV|d |	d | jrh|d |d |	d | jru|d	 |	d	 |g kr|t
| jd
d |g kr|t
| jdd |	g kr|t
|	 jdd td|  |S )N)r   r   r   r	   r
   r   z	Building z-D device mesh with z, )mesh_dim_namesr   r   r	   dp)mesh_dim_namedp_shard_cpdp_cpzmesh: )zipr   r   r   r	   r
   appendr   r   lentupler   dp_replicate_enableddp_shard_enabled
cp_enabled_flatten)
r   device_typedimsnamesr   namemeshZdp_mesh_dim_namesZdp_shard_cp_mesh_dim_namesZdp_cp_mesh_dim_namesr   r   r   
build_mesh@   sD   

 






zParallelDims.build_meshc                 C   s   | j dkp	| jdkS Nr   )r   r   r   r   r   r   
dp_enabledl   s   zParallelDims.dp_enabledc                 C   
   | j dkS r/   )r   r   r   r   r   r%   p      
z!ParallelDims.dp_replicate_enabledc                 C   r1   r/   )r   r   r   r   r   r&   t   r2   zParallelDims.dp_shard_enabledc                 C   r1   r/   )r	   r   r   r   r   r'   x   r2   zParallelDims.cp_enabledc                 C   r1   r/   )r
   r   r   r   r   
tp_enabled|   r2   zParallelDims.tp_enabledc                 C   r1   r/   )r   r   r   r   r   
pp_enabled   r2   zParallelDims.pp_enabledc                 C   s   | j dko| jS r/   )r
   r   r   r   r   r   loss_parallel_enabled   s   z"ParallelDims.loss_parallel_enabledc                 C   s   | j | j | j S r   )r	   r
   r   r   r   r   r   non_data_parallel_size   s   z#ParallelDims.non_data_parallel_sizeN)__name__
__module____qualname__int__annotations__boolr   r   r.   propertyr0   r%   r&   r'   r3   r4   r5   r   r6   r   r   r   r   r      s6   
 ,






r   N)	dataclassesr   	functoolsr   torch.distributed.device_meshr   %cosmos_predict2._src.imaginaire.utilsr   r   r   r   r   r   <module>   s   