o
    ?߱i1                  	   @   s   d dl Z d dlZd dlZd dlm  mZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZ d	d
d
dd dd
g df	dedee defddZdd Z	
		
ddefddZddededB fddZdS )    N)FileSystemReader)S3StorageReader)instantiate)distributedlogmisc)get_config_moduleoverride)easy_io)DefaultLoadPlannerDistributedCheckpointerModelWrapperz;cosmos_predict2/_src/predict2/configs/video2world/config.pyFToverride_cacheexperiment_optsskip_teacher_initc                 C   s  t |}t| }t|dd|  g|	 }|rd|jjj_|du r/|jjjjr/d|jjj_|	  |
  tj|dd |jjjtjj_|jjjtjj_d tjj_tjjj_td|  t|jdrt|jjdr|jjjd	urd
dlm} |jjjj}|rtj|dnd}||||jjjj d}||krtd|  ||jjj_|
rt|jdrt|jjdrtd d|jj_!|sd|jj_"t#d t$|j }|%  W d	   n1 sw   Y  t&d|  t'||||||}||fS )a  
    experiment_name: experiment name
    s3_checkpoint_dir: s3 path to iteration_model
    s3_credential_path: s3 credential path, if None, use credential from config
    config_file: config file path
    enable_fsdp: enable fsdp
    load_ema_to_reg: load ema as regular model
    seed: random seed
    local_cache_dir: local cache directory, if None, do not cache
    override_cache: override cache, if True, override cache if local cache exists
    skip_teacher_init: if True, skip loading teacher checkpoint during inference (faster)
    z--zexperiment=FT)seedby_rankzLoading model from configtext_encoder_configNr   )cache_text_encoder_checkpointtext_encoderz../cosmos3_interactive_cache_ckpts/text_encoder)Zs3_ckpt_path	cache_dirs3_credential_pathz&Using cached text encoder checkpoint: load_teacher_weightszTSetting load_teacher_weights=False for inference to skip teacher checkpoint download   zinstantiate modelzLoading checkpoint from )(r   	importlibimport_modulemake_configr	   modelr   emaenabledvalidatefreezer   set_random_seedtrainercudnndeterministictorchbackends	benchmark
allow_tf32cudamatmulr   infohasattrr   Z9cosmos_predict2._src.interactive.utils.text_encoder_cacher   	ckpt_pathospathjoinr   r   Zfsdp_shard_sizetimerr   on_train_startprint%load_model_state_dict_from_checkpoint)experiment_names3_checkpoint_dirconfig_fileZenable_fsdpload_ema_to_regZinstantiate_emar   local_cache_dirr   r   r   config_moduler   r   Zoriginal_ckpt_pathZtext_encoder_cache_dirZcached_ckpt_pathr    r=   ]/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/interactive/utils/model_loader.pyload_model_from_checkpoint#   sZ   




r?   c           	      C   s   |s| S t dd |  D }t dd |  D }|rHtd i }|  D ]\}}|dr<|ddd}|||< q'|dsE|||< q'|S |rQtd | S td	 | S )
a\  
    Process .pt checkpoint state dict to handle EMA and non-EMA variants.

    Consolidated .pt checkpoints can be in different formats:
    1. Non-EMA checkpoint: Keys like 'net.xxx' - load directly
    2. EMA-only checkpoint: Keys like 'net.xxx' (exported from EMA weights) - load directly
    3. Full checkpoint with EMA: Has both 'net.xxx' and 'net_ema.xxx' keys

    When load_ema_to_reg=True:
    - If checkpoint has 'net_ema.xxx' keys, map them to 'net.xxx' for loading into regular model
    - If checkpoint only has 'net.xxx' keys (common for exported EMA checkpoints), load directly

    Args:
        state_dict: The loaded state dict from .pt file
        model: The model instance (used to get expected keys)
        load_ema_to_reg: Whether to load EMA weights into regular model

    Returns:
        Processed state dict ready for loading
    c                 s       | ]}| d V  qdS )net_ema.N
startswith.0kr=   r=   r>   	<genexpr>       z1_process_pt_state_dict_for_ema.<locals>.<genexpr>c                 s   r@   )net.NrB   rD   r=   r=   r>   rG      rH   zSProcessing EMA checkpoint: mapping net_ema.* keys to net.* for load_ema_to_reg=TruerA   rI   r   z8Loading EMA-exported checkpoint with net.* keys directlyz/Loading checkpoint without net./net_ema. prefix)anykeysr   r-   itemsrC   replace)	
state_dictr   r:   Zhas_net_ema_keysZhas_net_keysZmapped_state_dictkeyvaluenew_keyr=   r=   r>   _process_pt_state_dict_for_ema   s(   





rR   c                 C   s<  |d urt |}|drdnd}|dr0|dkr|}n|ddr(|}n
tj|d}n|}dd	lm} d
}	||}
|	rit	
 ratd|
  tj|
ddd}t|| |}| j|dd t	j| dd n|dkrtd|  t	
 rd|v rtj|dddd}ntj|ddd}d|v r|d }nd|v r|d }n|}t|| |}| j|dd t	j| dd |d urt	
 rtd|
  t|  |
 nFtd|  t|j|jd d
d}||}t| |d}| }tj||td
dd || |d urtd|
  t|  |
 tj  | S )Nz.ptptdcpzs3:/z/modelr   r   )get_checkpoint_pathTz"Loading model cached locally from cpuF)map_locationweights_only)strict)srczLoading .pt checkpoint from s3://s3zcredentials/s3_training.secret)backendr   )backend_argsrN   zCaching model state dict to zLoading DCP checkpoint from s3 )	callbacksZdisable_async)r:   )allow_partial_load)storage_readerplanner)strendswithrC   rstripr0   r1   r2   3cosmos_predict2._src.imaginaire.utils.checkpoint_dbrV   r   is_rank0r   r-   r'   loadrR   load_state_dictsync_model_statesr
   dumprN   r   
checkpointjobget_storage_readerr   rT   r   r+   empty_cache)r   r   r8   r:   r;   r   checkpoint_formatZcur_key_ckpt_full_pathrV   Zload_from_localZlocal_s3_ckpt_fprN   Zpt_state_dictmodel_statecheckpointerrb   Z_model_wrapper_state_dictr=   r=   r>   r6      sv   






r6   checkpoint_pathcredential_pathc                 C   s(   d| v r|rt || d}|S t| }|S )Nr\   )rv   r1   )r   r   )ru   rv   rb   r=   r=   r>   ro   !  s   ro   )FNF)N) r   r0   r'   torch.distributed.checkpointr   rm   rT   'torch.distributed.checkpoint.filesystemr   Z:cosmos_predict2._src.imaginaire.checkpointer.s3_filesystemr   +cosmos_predict2._src.imaginaire.lazy_configr   %cosmos_predict2._src.imaginaire.utilsr   r   3cosmos_predict2._src.imaginaire.utils.config_helperr   r	   -cosmos_predict2._src.imaginaire.utils.easy_ior
   Z1cosmos_predict2._src.interactive.checkpointer.dcpr   r   r   boollistrd   r?   rR   r6   ro   r=   r=   r=   r>   <module>   sF   
	

]9
l