o
    viG                     @   s8  d dl Z d dlZd dlmZ d dlZd dlmZmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZmZmZm Z  					 							dde!dee"e#  de!dee"e#  de!dee# fddZ$			dde!fddZ%de	defddZ&dS )     N)Optional)
LoraConfigset_peft_model_state_dict)Config)INTERNALSMOKE)instantiate)ImaginaireModel)distributedlogmisc)get_config_moduleoverride)easy_io)hsdp_device_mesh)DefaultLoadPlannerDistributedCheckpointerModelWrapperdcp_load_state_dict9cosmos_policy/_src/predict2/configs/video2world/config.pyFToverride_cacheexperiment_optsskip_load_modeladapter_checkpoint_pathscache_text_encoder	to_devicec           "   	   C   s  |	du rg }	t |}t| }t|dd|  g|	 }|r-td|  t||j_	|r5d|j
jj_|du rE|j
jjjrEd|j
jj_|  |  tj|dd |jjjtjj_|jjjtjj_d tjj_tjjj_td|  |s~d	|j
j_td
 t|j
}|dur|t| |   W d   n1 sw   Y  |
s|rt!||||||}tdt"| d dd t#t"|D }t$||D ]\}}td| d|  t%|jj&|jj'|jj(dd |jj)*dD |jj+d}|j,-|| |.drStj/|dd}t0|1 }|D ] }d|v r7|r$dnd}|2|d2dd}|3|||< qt4|j,||d}|j5D ]}t6d|  J dnOtd| d |  |7d!.d"rj|}nt8j9:|d#}t;|j|j<ddd$}t=||d%}|d& di}|j>|d'}|?|} t@dd(}!tA|| |! |B| td)| d* q|j,C|d+  td,|d+   ||fS t!||||||}||fS )-a)  
    Load model from checkpoint with optional multi-adapter support.

    Args:
        experiment_name: experiment name
        s3_checkpoint_dir: s3 path to iteration_model
        config_file: config file path
        enable_fsdp: enable fsdp
        load_ema_to_reg: load ema as regular model
        instantiate_ema: whether to instantiate EMA
        seed: random seed
        local_cache_dir: local cache directory, if None, do not cache
        override_cache: override cache, if True, override cache if local cache exists
        experiment_opts: experiment options
        skip_load_model: skip loading model weights
        adapter_checkpoint_paths: list of checkpoint paths for loading multiple adapters
            Supports both .pt and DCP checkpoint formats (auto-detected by file extension).
            Example:
                adapter_checkpoint_paths=[
                    "s3://bucket/exp1/checkpoints/model.pt",  # .pt format
                    "s3://bucket/exp2/checkpoints"  # DCP format
                ]
        cache_text_encoder: cache text encoder, if True, cache text encoder. This is default to False to avoid race condition if multiple nodes are running inference concurrently (e.g., running inference pipeline).

    Returns:
        model: loaded model
        config: config object
    Nz--zexperiment=z(Overriding config checkpoint path with: FT)seedZby_rankzLoading model from    zinstantiate modelzLoading z$ adapters from different checkpointsc                 S   s   g | ]}d | qS )Zadapter_ ).0ir   r   T/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/predict2/utils/model_loader.py
<listcomp>   s    z.load_model_from_checkpoint.<locals>.<listcomp>zLoading adapter 'z' from c                 S   s   g | ]}|  qS r   )strip)r   moduler   r   r!   r"      s    ,)r
lora_alphainit_lora_weightsZtarget_modulesuse_dora.ptcpu)map_locationlora_znet.net_ema.base_model.model.zdefault. )adapter_namezUnexpected key: zUnexpected key foundz
' from s3 //modelmodel	callbacksZdisable_asyncload_ema_to_reg.)mapping_keysZallow_partial_loadzLoaded adapter ''r   zActivated adapter: )Dr   	importlibimport_moduleZmake_configr   r   infostr
checkpoint	load_pathr4   configZemaenabledvalidatefreezer   Zset_random_seedtrainercudnndeterministictorchbackends	benchmark
allow_tf32cudamatmulfsdp_shard_sizetimerr   todeviceZon_train_start%load_model_state_dict_from_checkpointlenrangezipr   Z	lora_rankr'   r(   Zlora_target_modulessplitr)   netadd_adapterendswithloadlistkeysreplacepopr   unexpected_keyswarningrstripospathjoinr   jobr   
state_dictget_storage_readerr   r   load_state_dictset_adapter)"experiment_names3_checkpoint_dirconfig_fileZenable_fsdpr8   Zinstantiate_emar   local_cache_dirr   r   r   r   r   r   Zconfig_modulerC   r4   adapter_namesr1   checkpoint_pathZlora_configadapter_state_dictZold_keyskeyZ
net_prefixnew_keyload_resultcur_key_ckpt_full_pathcheckpointer_model_wrapperr:   _state_dictstorage_readerload_plannerr   r   r!   load_model_from_checkpoint'   s   ,







r|   c                  C   sP  |d urt |}|drdnd}|dr0|dkr|}n|ddr(|}n
tj|d}n|}dd	lm} d
}	||}
t	rB| S |	r=t
 rtd|
  tj|
td}t| drt| jdr| jjr|dkrtd i }g }g }|  }| D ]?}d|v sd|v r|dddd}||v r|| ||< || d|  q~|| q~||v r|| ||< q~|| q~|rtdt| d |d d D ]
}td|  q|rtd|d d  d |}| j|dd t }t| dr4| jd ur4| j D ]\}}|d|  q| j D ]\}}|d|  qtdt| d  t
j | d|d! ntd"|  t!|j"|j#d d
d#}t$| |dkrX|ndd$}| }|dkr{|%|}t&d
d%}t'||| || nt
 r d&|v rtj|d'd(d)d*}nt|}d|v r|d }nd+|v r|d+ }n|}g }g }| D ]}||v r|| ||< q|| q| D ]}||vr|| q|rtd|d d  d |rtd,|d d  d || t
j | dd- |d ur!t
 r!td.|
  t(|  |
 t)j*+  | S )/Nr*   ptZdcpzs3:r2   r3   r4   r   )get_checkpoint_pathTz"Loading model cached locally from )weights_onlyrC   use_lorazIModel uses LoRA, mapping checkpoint keys to model keys with base_layer...zbase_layer.r/   r0   z -> zMapped z6 LoRA keys from checkpoint to model (showing first 5):   z  zMissing keys in checkpoint: 
   z... (showing first 10)F)strictnet_emar.   zSkipping sync for z> EMA parameters and buffers to avoid OOM during initialization)srcparams_and_buffers_to_ignorezLoading model from s3 r5   r7   r;   zs3://s3zcredentials/s3_training.secret)backendZs3_credential_path)Zbackend_argsrh   zUnexpected keys in checkpoint: )r   zCaching model state dict to ),r@   r[   
startswithrc   rd   re   rf   Z1cosmos_policy._src.imaginaire.utils.checkpoint_dbr~   r   r
   Zis_rank0r   r?   r   r\   r   hasattrrC   r   rh   r^   r_   appendrU   rb   rj   setr   named_parametersaddnamed_buffersZsync_model_statesr   rA   rg   r   ri   r   r   dumprJ   rN   empty_cache) r4   rC   rm   r8   ro   r   checkpoint_formatrv   r~   Zload_from_localZlocal_s3_ckpt_fplocal_state_dictZmapped_state_dictZmapped_keysmissing_keysmodel_state_dictZ	model_keyZcheckpoint_keyZ
mapped_keyr   
param_name_buffer_namerw   rx   ry   rz   r{   Zpt_state_dictmodel_statera   rs   r   r   r!   rT      s   

















rT   rC   returnc                 C   s   | j jj}d| j j_t| j  }t|| | jj| jjd}|dkr@|| j j_t	|d}t
|dr<t|jr<|| |S td|S )a  
    Instantiate a model, load weights from a consolidated checkpoint, and initialize FSDP if required.

    Args:
        config: The configuration object for the experiment.

    Returns:
        model: The loaded and (optionally) FSDP-wrapped model.
    r   )r4   rC   rm   r8   )Zsharding_group_size
apply_fsdpzyModel does not implement 'apply_fsdp'. Please implement this method to enable FSDP after consolidated checkpoint loading.)r4   rC   rP   r   rN   rT   rA   rB   r8   r   r   callabler   AttributeError)rC   rP   r4   Zfsdp_device_meshr   r   r!   3create_model_from_consolidated_checkpoint_with_fsdpg  s*   



r   )r   FFTr   NFNFNFN)FNF)'r=   rd   typingr   rJ   peftr   r   Z$cosmos_policy._src.imaginaire.configr   Z#cosmos_policy._src.imaginaire.flagsr   r   Z)cosmos_policy._src.imaginaire.lazy_configr   Z#cosmos_policy._src.imaginaire.modelr	   Z#cosmos_policy._src.imaginaire.utilsr
   r   r   Z1cosmos_policy._src.imaginaire.utils.config_helperr   r   Z+cosmos_policy._src.imaginaire.utils.easy_ior   Z/cosmos_policy._src.imaginaire.utils.fsdp_helperr   Z,cosmos_policy._src.predict2.checkpointer.dcpr   r   r   r   boolr]   r@   r|   rT   r   r   r   r   r!   <module>   s`   	



 !
 %