o
    vi                     @   s   d dl mZ d dlmZmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZ ejjdeej fdd	ZeG d
d dZG dd de
ZdS )    )	dataclass)ListTupleN)distributed)Callback)DiffusionModelparamsc                 C   s"   | D ]}t j|ddd|d qd S )Ng        )nanposinfneginfout)torch
nan_to_num)r   param r   U/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/predict2/callbacks/grad_clip.py_fused_nan_to_num   s   r   c                   @   sZ   e Zd ZU dZeed< dZeed< dddZde	j
ddfd	d
Zdeeef fddZdS )_MagnitudeRecordr   state
iter_countreturnNc                 C   s   d| _ d| _d S Nr   r   r   )selfr   r   r   reset&   s   
z_MagnitudeRecord.reset	cur_statec                 C   s    |  j |7  _ |  jd7  _d S )N   r   )r   r   r   r   r   update*   s   z_MagnitudeRecord.updatec                 C   s0   | j dkr| j| j  }| }nd}|   |S r   )r   r   itemr   )r   Z	avg_stater   r   r   get_stat.   s   

z_MagnitudeRecord.get_stat)r   N)__name__
__module____qualname__r   float__annotations__r   intr   r   Tensorr   r   r   r   r   r   r   r   !   s   
 
r   c                   @   s~   e Zd ZdZddefddZ	dded	eee	j
f d
eddfddZ	ddejde	jjde	jjjde	jjd
eddfddZdS )GradClipz
    This callback is used to clip the gradient norm of the model.
    It also logs the average gradient norm of the model to wandb.
          ?Tforce_finitec                 C   s&   || _ || _t | _t | _d | _d S N)	clip_normr)   r   img_mag_logvideo_mag_log
_cur_state)r   r+   r)   r   r   r   __init__>   s
   
zGradClip.__init__r   model
data_batch	iterationr   Nc                 C   s"   | |r| j| _d S | j| _d S r*   )is_image_batchr,   r.   r-   )r   r0   r1   r2   r   r   r   on_training_step_startF   s   
zGradClip.on_training_step_start	model_ddp	optimizer	schedulergrad_scalerc                 C   s   ~~t |tjr|j}n|}g }| jr)| D ]}|jd ur$||j qt| |	| j
}	| j|	 || jjj dkrY| j | j }
}tjr[tj|
||d|d d S d S d S )Nr   )zclip_grad_norm/imagezclip_grad_norm/videor2   )step)
isinstancer   DistributedDataParallelmoduler)   
parametersgradappendr   clip_grad_norm_r+   r.   r   configtrainerlogging_iterr,   r   r-   wandbrunlog)r   r5   r6   r7   r8   r2   r0   r   r   
total_normZavg_img_magZavg_video_magr   r   r   on_before_optimizer_stepN   s2   

z!GradClip.on_before_optimizer_step)r(   T)r   )r    r!   r"   __doc__boolr/   r   dictstrr   r&   r%   r4   r   r;   optim	Optimizerlr_schedulerLRScheduleramp
GradScalerrH   r   r   r   r   r'   8   s6    	
r'   )dataclassesr   typingr   r   r   rD   #cosmos_policy._src.imaginaire.utilsr   ,cosmos_policy._src.imaginaire.utils.callbackr   3cosmos_policy._src.predict2.models.text2world_modelr   jitscriptr&   r   r   r'   r   r   r   r   <module>   s   