o
    ?߱i
                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ dej	fddZ
		dd
ej	dedededejjf
ddZdejjd
ej	defddZdS )    N)
ListConfig)nn)log)	FusedAdamnetc                 C   sP   dd |   D }dd | D }dd | D }dd | D }||fS )zu
    seperate the parameters of the network into two groups: decay and no_decay.
    based on nano_gpt codebase.
    c                 S   s   i | ]\}}||qS  r   .0pnpr   r   _/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/predict2/utils/optim_instantiate.py
<dictcomp>   s    z+get_regular_param_group.<locals>.<dictcomp>c                 S   s   i | ]
\}}|j r||qS r   )requires_gradr   r   r   r   r       s    c                 S   s    g | ]\}}|  d kr|qS    dimr	   nr   r   r   r   
<listcomp>"        z+get_regular_param_group.<locals>.<listcomp>c                 S   s    g | ]\}}|  d k r|qS r   r   r   r   r   r   r   #   r   )named_parametersitems)r   
param_dictdecay_paramsnodecay_paramsr   r   r   get_regular_param_group   s
   r   adamwmodellrweight_decay
optim_typereturnc                 K   s   t | \}}tdd |D }tdd |D }|| }	td|	d || ||dg}
|dkr7tjj}n|dkr>t}ntd	| |	 D ]\}}t
|trXt|||< qI||
fi |S )
Nc                 s       | ]}|  V  qd S Nnumelr	   r   r   r   r   	<genexpr>0       z%get_base_optimizer.<locals>.<genexpr>c                 s   r#   r$   r%   r'   r   r   r   r(   1   r)   ztotal num parameters : ,)paramsr   r    r   	fusedadamzUnknown optimizer type: )r   sumr   criticaltorchoptimAdamWr   
ValueErrorr   
isinstancer   list)r   r   r    r!   kwargsnet_decay_paramnet_nodecay_paramnum_decay_paramsnum_nodecay_paramsnet_param_totalparam_groupopt_clskvr   r   r   get_base_optimizer'   s(   

r?   	optimizerscheduler_configc                 C   s(   t j|}||_tjjj| |jgdS )N)	lr_lambda)	hydrautilsinstantiater   r/   r0   lr_schedulerLambdaLRschedule)r@   r   rA   net_schedulerr   r   r   get_base_schedulerK   s   rJ   )r   )rC   r/   	omegaconfr   r   %cosmos_predict2._src.imaginaire.utilsr   Z6cosmos_predict2._src.predict2.utils.fused_adam_dtensorr   Moduler   floatstrr0   	Optimizerr?   dictrJ   r   r   r   r   <module>   s4   
$