o
    ?߱i M                     @  s  d Z ddlmZ ddlZddlZddlZddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZ ddlmZ z
ddlmZ dZW n eyM   d	ZY nw dd
lmZ ddlmZ ddlmZ ddlmZ edZ dHddZ!dIddZ"dJdKddZ#dLdMd d!Z$e"ej%d	d"G d#d$ d$Z&e"ej%d	d"G d%d& d&Z'e"ej%d	d"G d'd( d(Z(e"ej%d	d"G d)d* d*Z)e"ej%d	d"G d+d, d,Z*e"ej%d	d"G d-d. d.Z+e"ej%d	d"G d/d0 d0Z,e"ej%d	d"G d1d2 d2Z-e"ej%d	d"G d3d4 d4Z.e"ej%d	d"G d5d6 d6Z/e"ej%d	d"G d7d8 d8Z0e"ej%d	d"G d9d: d:Z1e"ej%d	d"G d;d< d<Z2e"ej%d	d"G d=d> d>Z3dNdOdCdDZ4dPdQdFdGZ5dS )Rz%Training config system for Imaginare4    )annotationsN)AnyDictOptionalTypeTypeVarUnion)logger)TRAINING)ModelParallelConfigTF)LazyCall)LazyDict)distributed)ColorTobjobjectreturnboolc                 C  s
   t | dS )z
    Helper function to check if an object is an instance of an attrs-defined class.

    Args:
        obj: The object to check.

    Returns:
        bool: True if the object is an instance of an attrs-defined class, False otherwise.
    __attrs_attrs__)hasattr)r    r   P/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/imaginaire/config.py_is_attrs_instance.   s   

r   clsc                   s@   t | ds	td| j d fdd}|| _dd	d
}|| _| S )a~  
    A decorator that adds the capability to freeze instances of an attrs-defined class.

    NOTE: This requires the wrapped attrs to be defined with attrs.define(slots=False) because we need
    to hack on a "_is_frozen" attribute.

    This decorator enhances an attrs-defined class with the ability to be "frozen" at runtime.
    Once an instance is frozen, its attributes cannot be changed. It also recursively freezes
    any attrs-defined objects that are attributes of the class.

    Usage:
        @make_freezable
        @attrs.define(slots=False)
        class MyClass:
            attribute1: int
            attribute2: str

        obj = MyClass(1, 'a')
        obj.freeze()  # Freeze the instance
        obj.attribute1 = 2  # Raises AttributeError

    Args:
        cls: The class to be decorated.

    Returns:
        The decorated class with added freezing capability.
    __dict__zmake_freezable cannot be used with classes that do not define __dict__. Make sure that the wrapped class was defined with `@attrs.define(slots=False)`r   Nonec                   s0   t | dr| jr|dkrtd | || dS )z
        Override __setattr__ to allow modifications during initialization
        and prevent modifications once the instance is frozen.
        
_is_frozenzCannot modify frozen instanceN)r   r   AttributeError)selfkeyvalueoriginal_setattrr   r   setattr_override`   s   z(make_freezable.<locals>.setattr_overrider   r   c                 S  s@   t j| dd D ]\}}t|rt|dr|  q	d| _dS )zK
        Freeze the instance and all its attrs-defined attributes.
        F)recursefreezeTN)attrsasdictitemsr   r   r&   r   )r   _r!   r   r   r   r&   k   s
   
zmake_freezable.<locals>.freezeNr   r   )r   r   r   r   )r   	TypeError__setattr__r&   )r   r$   r&   r   r"   r   make_freezable;   s   
	
	r.   indentint	use_colorstrc                 C  s  t | jsJ g }t | jD ]m}t| |j}t |jrO|r5|d| td t	|j d  n|d| d |j d  |t
||d | q|rk|d| td t	|j d t|  q|d| d |j d t|  qd|S )z=
    Recursively pretty prints attrs objects with color.
       * :   : 
)r'   has	__class__fieldsgetattrnameappendr   cyangreen_pretty_print_attrs_instanceyellowr2   join)r   r/   r1   lines	attributer!   r   r   r   rA   y   s   **&
rA   	overridesOptional[list[str]]c                 C  s   g }| tdtd d  | D ]B}|dkrq|dr(|dd }d}n|d\}}|rH| d	td t| d t|  q| d
| d t|  qd|S )z"
    Pretty prints overrides.
    r4   rF   r7   z--~r6   N=r3   z   * r8   )	r>   r   r?   r@   
startswithsplitrB   r2   rC   )rF   r1   rD   overrideattribute_nameZattribute_valuer   r   r   pretty_print_overrides   s   
.
rN   )slotsc                   @  s2   e Zd ZU dZded< dZded< dZded< dS )	ObjectStoreConfigFr   enabled r2   credentialsbucketN)__name__
__module____qualname__rQ   __annotations__rS   rT   r   r   r   r   rP         
 rP   c                   @  sf   e Zd ZU dZded< dZded< dZded< dZded< dZd	ed
< e	dddZ
e	dddZdS )	JobConfigrR   r2   projectgroupr=   online
wandb_modeNzOptional[Any]clusterr   c                 C  s   | j  d| j d| j S )N/)r[   r\   r=   r   r   r   r   path   s   zJobConfig.pathc                 C  s   t jdd}| d| j S )NZIMAGINAIRE_OUTPUT_ROOTz/tmp/imaginaire4-outputr`   )osenvirongetrb   )r   Z
local_rootr   r   r   
path_local   s   zJobConfig.path_local)r   r2   )rU   rV   rW   r[   rX   r\   r=   r^   r_   propertyrb   rf   r   r   r   r   rZ      s   
 rZ   c                   @  2   e Zd ZU dZded< dZded< dZded< dS )		EMAConfigFr   rQ   gH.?floatbetatorch_compile_buffer_renamingN)rU   rV   rW   rQ   rX   rk   rl   r   r   r   r   ri      rY   ri   c                   @  rh   )	PowerEMAConfigFr   rQ   g?rj   srl   N)rU   rV   rW   rQ   rX   rn   rl   r   r   r   r   rm      rY   rm   c                   @  s2   e Zd ZU dZded< dZded< dZded< dS )	DDPConfigFr   find_unused_parametersTstatic_graphbroadcast_buffersN)rU   rV   rW   rp   rX   rq   rr   r   r   r   r   ro      rY   ro   c                   @  s&   e Zd ZU dZded< dZded< dS )CuDNNConfigFr   deterministicT	benchmarkN)rU   rV   rW   rt   rX   ru   r   r   r   r   rs      s   
 rs   c                   @  sJ   e Zd ZU dZded< dZded< dZded	< d
Zded< dZded< dS )	JITConfigFr   rQ   NzUnion[list[int], None]input_shapecudar2   devicebfloat16dtypeTstrict)	rU   rV   rW   rQ   rX   rw   ry   r{   r|   r   r   r   r   rv      s   
 rv   c                   @  s   e Zd ZU dZded< dZded< ejedZ	ded	< d
Z
ded< dZded< ejedZded< dZded< dZded< dZded< g Zded< ejedZded< dZded< g Zded< dZded< dZded< dZded< dS )CheckpointConfigNzOptional[Dict]typeFr   dcp_async_mode_enabledfactoryrP   save_to_object_storeɚ;r0   	save_iterTstrict_resumeload_from_object_storerR   r2   	load_pathload_training_stateonly_load_scheduler_state	list[str]keys_to_skip_loadingrv   jitverbosekeys_not_to_resumebroadcast_via_filesystemload_ema_to_regenable_gcs_patch_in_boto3)rU   rV   rW   r~   rX   r   r'   fieldrP   r   r   r   r   r   r   r   r   rv   r   r   r   r   r   r   r   r   r   r   r}     s"   
 
r}   c                   @  s*   e Zd ZU dZdZded< dZded< dS )
NVTXConfigzConfig for NVTX ranges used in the main training loop.

    See tutorials/nanogpt for more details on how to integrate profiling into your model.Fr   rQ   cuda_synchronizeN)rU   rV   rW   __doc__rQ   rX   r   r   r   r   r   r   G  s   
 r   c                   @  s~   e Zd ZU dZdZded< dZded< dZded	< d
Zded< dZ	ded< dZ
ded< dZded< dZded< dZded< dS )StragglerDetectionConfigzConfig for Straggler detection tool: https://gitlab-master.nvidia.com/dl/gwe/fault_tolerance_related/straggler/-/tree/cupti?ref_type=headsFr   rQ   d   r0   report_freqr6   profile_freqg       @rj   max_diffTraise_erroranalyze_forwardanalyze_backwardanalyze_optimizeranalyze_dataloadingN)rU   rV   rW   r   rQ   rX   r   r   r   r   r   r   r   r   r   r   r   r   r   T  s   
 r   c                   @  s   e Zd ZU dZded< dZded< dZded< dZded< ee	d	Z
d
ed< dZded< dZded< dZded< dZded< dS )	ProfilingFr   enable_profilingenable_memory_snapshotsave_s3r6   r0   r      z	list[int]target_ranksrecord_shapeprofile_memoryT
with_stackwith_modulesN)rU   rV   rW   r   rX   r   r   r   listranger   r   r   r   r   r   r   r   r   r   m  s   
 r   c                   @  s*   e Zd ZU dZdZded< dZded< dS )	CompileConfigzT
    torch.compile config options passed to set_torch_compile_options function.
    r   r0   recompile_limitTr   use_duck_shapeN)rU   rV   rW   r   r   rX   r   r   r   r   r   r   }  s   
 r   c                   @  sj  e Zd ZU er2ddlmZ ddlmZ eZde	d< e
eeej eej eej dZde	d< d	Zd
e	d< ejedZde	d	< ejedZde	d< dZde	d< ejdd dZde	d< dZde	d< dZde	d< dZde	d< dZde	d< dZde	d < d!Zde	d"< dZ de	d#< e!j"Z#d$e	d%< d&Z$de	d'< eje%dZ&d(e	d)< eje'dZ(d*e	d+< eje)dZ*d,e	d-< dS ).TrainerConfigr   )ImaginaireTrainer)callbackzType[ImaginaireTrainer]r~   )Zemaprogress_barwandbr   	callbacksddpr2   distributed_parallelismr   ro   rs   cudnnr0   seedc                   C  s
   t ddS )NF)rQ   )dictr   r   r   r   <lambda>  s   
 zTrainerConfig.<lambda>r   grad_scaler_argsr   max_iterNz
int | Nonemax_val_iterr   logging_iterTr   run_validationvalidation_iterFrun_validation_on_starttimeout_periodztorch.memory_formatmemory_formatr6   grad_accum_iterr   straggler_detectionr   	profilingr   compile_config)+rU   rV   rW   r
   Z'cosmos_predict2._src.imaginaire.trainerr   %cosmos_predict2._src.imaginaire.utilsr   r~   rX   r   r   LZEMAModelCallbackZProgressBarCallbackZWandBCallbackr   r   r'   r   ro   r   rs   r   r   r   r   r   r   r   r   r   r   torchpreserve_formatr   r   r   r   r   r   r   r   r   r   r   r   r     s:   
 


	r   c                   @  s   e Zd ZU dZded< ded< ded< ded< ded< ejed	Zd
ed< eje	d	Z
ded< er<ejed	Zded< ndZded< ejed	Zded< dZded< d!d"ddZd#ddZd$dd ZdS )%Configz[Config for an imaginaire4 job.

    See /README.md/Configuration System for more info.
    r   model	optimizer	schedulerzLazyDict | NoneZdataloader_trainZdataloader_valr   rZ   jobr   trainerr   model_parallelNr   r}   
checkpointFr   upload_reproducible_setupr1   r   r2   c                 C  s   t | d|S )Nr   )rA   )r   r1   r   r   r   pretty_print  s   zConfig.pretty_printdict[str, Any]c                 C  s
   t | S )N)r'   r(   ra   r   r   r   to_dict  s   
zConfig.to_dictc                 C  sr   t t| jjd }t|d | 	 
 d| j_| jjdks'J | jjdks/J | jjdks7J dS )z1Validate that the config has all required fields.zutf-8r   rR   N)r   
ByteTensor	bytearrayr   r=   rx   r   	broadcastcpunumpytobytesdecoder[   r\   )r   Zjob_name_tensorr   r   r   validate  s   zConfig.validateF)r1   r   r   r2   )r   r   r+   )rU   rV   rW   r   rX   r'   r   rZ   r   r   r   USE_MEGATRONr   r   r}   r   r   r   r   r   r   r   r   r   r     s"   
 
r   config_pathoptsr   enable_one_loggerc                 C  s   ddl m}m} t }| dr*|| }||j }ddlm	} |||dd}nt
| |dd}|r_z"dd	lm}	 t }
|	|}t }td
||
 d dd W n	 ty^   Y nw t }td|| d dd |S )Nr   )	from_yamlload_callablez.yaml)rL   T)Zremove_defaultsF)r   )override_one_logger_callbackz#override_one_logger_callback: took     .A.2fmsztoal time to load config: )Z-cosmos_predict2._src.imaginaire.serializationr   r   timemonotonic_nsendswithrV   make_config3cosmos_predict2._src.imaginaire.utils.config_helperrL   _load_py_configZJcosmos_predict2._src.imaginaire.utils.one_logger.one_logger_override_utilsr   loggingdebugImportError)r   r   r   r   r   t1configr*   rL   r   Zol_t1Zol_t2t2r   r   r   load_config  s*   
 r   r   c           	      C  s   ddl m}m} t }|| }t }td|| d dd t }t|	 }t }td|| d dd t }|||}t }td|| d dd |rvt }|
  t }td	|| d dd |S )
Nr   )get_config_modulerL   zget_config_module: took r   r   r   zimportlib.import_module: took zoverride: took zconfig.validate: took )r   r   rL   r   r   r   r   	importlibimport_moduler   r   )	r   r   r   r   rL   r   Zconfig_moduler   r   r   r   r   r     s&   
r   )r   r   r   r   )r   r   r   r   )r   F)r   r   r/   r0   r1   r   r   r2   )NF)rF   rG   r1   r   r   r2   r   )r   r2   r   r   r   r   r   r   )T)r   r2   r   r   r   r   r   r   )6r   
__future__r   r   rc   r   typingr   r   r   r   r   r   r'   r   logurur	   r   %cosmos_predict2._src.imaginaire.flagsr
   megatron.corer   r   r   Z+cosmos_predict2._src.imaginaire.lazy_configr   r   r   r   r   Z*cosmos_predict2._src.imaginaire.utils.miscr   r   r   r.   rA   rN   definerP   rZ   ri   rm   ro   rs   rv   r}   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s    

>
	

	
	
	


?



	
3
7"