o
    ?߱i
                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ dejfddZ			dd
ejde	de	de
dedejjfddZdejjd
ejdefddZdS )    N)nn)log)	FusedAdamnetc                 C   sP   dd |   D }dd | D }dd | D }dd | D }||fS )zu
    seperate the parameters of the network into two groups: decay and no_decay.
    based on nano_gpt codebase.
    c                 S   s   i | ]\}}||qS  r   .0pnpr   r   a/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/imaginaire/utils/optim_instantiate.py
<dictcomp>   s    z+get_regular_param_group.<locals>.<dictcomp>c                 S   s   i | ]
\}}|j r||qS r   )requires_gradr   r   r   r   r      s    c                 S   s    g | ]\}}|  d kr|qS    dimr   nr
   r   r   r   
<listcomp>         z+get_regular_param_group.<locals>.<listcomp>c                 S   s    g | ]\}}|  d k r|qS r   r   r   r   r   r   r   !   r   )named_parametersitems)r   Z
param_dictZdecay_paramsZnodecay_paramsr   r   r   get_regular_param_group   s
   r   adamwFmodellrweight_decay
optim_typeshardingreturnc                 K   s   t | \}}tdd |D }tdd |D }	||	 }
td|
d || ||dg}|dkr7tjj}n|dkr>t}ntd	| ||fi |S )
Nc                 s       | ]}|  V  qd S Nnumelr   r
   r   r   r   	<genexpr>/       z%get_base_optimizer.<locals>.<genexpr>c                 s   r    r!   r"   r$   r   r   r   r%   0   r&   ztotal num parameters : ,)paramsr   r   r   Z	fusedadamzUnknown optimizer type: )	r   sumr   criticaltorchoptimAdamWr   
ValueError)r   r   r   r   r   kwargsZnet_decay_paramZnet_nodecay_paramZnum_decay_paramsZnum_nodecay_paramsZnet_param_totalparam_groupZopt_clsr   r   r   get_base_optimizer%   s    
r1   	optimizerscheduler_configc                 C   s6   t j|}||_t| j}tjjj	| |j
g| dS )N)	lr_lambda)hydrautilsinstantiater   lenparam_groupsr+   r,   lr_schedulerLambdaLRschedule)r2   r   r3   Znet_schedulerZnum_param_groupsr   r   r   get_base_schedulerF   s   
r=   )r   F)r5   r+   r   %cosmos_predict2._src.imaginaire.utilsr   Z0cosmos_predict2._src.imaginaire.utils.fused_adamr   Moduler   floatstrboolr,   	Optimizerr1   dictr=   r   r   r   r   <module>   s8   
!