o
    ?߱iI                     @   s8  d dl Z d dlZd dlmZ d dlZd dlmZmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZmZmZm Z  					 							dde!dee"e#  de!dee"e#  de!dee# fddZ$			dde!fddZ%de	defddZ&dS )     N)Optional)
LoraConfigset_peft_model_state_dict)Config)INTERNALSMOKE)instantiate)ImaginaireModel)distributedlogmisc)get_config_moduleoverride)easy_io)hsdp_device_mesh)DefaultLoadPlannerDistributedCheckpointerModelWrapperdcp_load_state_dict;cosmos_predict2/_src/predict2/configs/video2world/config.pyFToverride_cacheexperiment_optsskip_load_modeladapter_checkpoint_pathscache_text_encoder	to_devicec           "   	   C   s  |	du rg }	t |}t| }t|dd|  g|	 }|r-td|  t||j_	|r5d|j
jj_|du rE|j
jjjrEd|j
jj_|  |  tj|dd |jjjtjj_|jjjtjj_d tjj_tjjj_td|  |s~d	|j
j_td
 t|j
}|dur|t| |   W d   n1 sw   Y  |
s|rt!||||||}tdt"| d dd t#t"|D }t$||D ]\}}td| d|  t%|jj&|jj'|jj(dd |jj)*dD |jj+d}|j,-|| |.drStj/|dd}t0|1 }|D ] }d|v r7|r$dnd}|2|d2dd}|3|||< qt4|j,||d}|j5D ]}t6d|  J dnOtd| d |  |7d!.d"rj|}nt8j9:|d#}t;|j|j<ddd$}t=||d%}|d& di}|j>|d'}|?|} t@dd(}!tA|| |! |B| td)| d* q|j,C|d+  td,|d+   ||fS t!||||||}||fS )-a)  
    Load model from checkpoint with optional multi-adapter support.

    Args:
        experiment_name: experiment name
        s3_checkpoint_dir: s3 path to iteration_model
        config_file: config file path
        enable_fsdp: enable fsdp
        load_ema_to_reg: load ema as regular model
        instantiate_ema: whether to instantiate EMA
        seed: random seed
        local_cache_dir: local cache directory, if None, do not cache
        override_cache: override cache, if True, override cache if local cache exists
        experiment_opts: experiment options
        skip_load_model: skip loading model weights
        adapter_checkpoint_paths: list of checkpoint paths for loading multiple adapters
            Supports both .pt and DCP checkpoint formats (auto-detected by file extension).
            Example:
                adapter_checkpoint_paths=[
                    "s3://bucket/exp1/checkpoints/model.pt",  # .pt format
                    "s3://bucket/exp2/checkpoints"  # DCP format
                ]
        cache_text_encoder: cache text encoder, if True, cache text encoder. This is default to False to avoid race condition if multiple nodes are running inference concurrently (e.g., running inference pipeline).

    Returns:
        model: loaded model
        config: config object
    Nz--zexperiment=z(Overriding config checkpoint path with: FT)seedby_rankzLoading model from    zinstantiate modelzLoading z$ adapters from different checkpointsc                 S   s   g | ]}d | qS )Zadapter_ ).0ir   r   Z/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/predict2/utils/model_loader.py
<listcomp>   s    z.load_model_from_checkpoint.<locals>.<listcomp>zLoading adapter 'z' from c                 S   s   g | ]}|  qS r   )strip)r    moduler   r   r"   r#      s    ,)r
lora_alphainit_lora_weightsZtarget_modulesuse_dora.ptcpu)map_locationlora_znet.net_ema.base_model.model.zdefault. )adapter_namezUnexpected key: zUnexpected key foundz
' from s3 //modelmodel	callbacksdisable_asyncload_ema_to_reg.)mapping_keysallow_partial_loadzLoaded adapter ''r   zActivated adapter: )Dr   	importlibimport_modulemake_configr   r   infostr
checkpoint	load_pathr5   configemaenabledvalidatefreezer   set_random_seedtrainercudnndeterministictorchbackends	benchmark
allow_tf32cudamatmulfsdp_shard_sizetimerr   todeviceon_train_start%load_model_state_dict_from_checkpointlenrangezipr   Z	lora_rankr(   r)   Zlora_target_modulessplitr*   netadd_adapterendswithloadlistkeysreplacepopr   unexpected_keyswarningrstripospathjoinr   jobr   
state_dictget_storage_readerr   r   load_state_dictset_adapter)"experiment_names3_checkpoint_dirconfig_fileenable_fsdpr:   instantiate_emar   local_cache_dirr   r   r   r   r   r   config_modulerG   r5   adapter_namesr2   checkpoint_pathZlora_configadapter_state_dictold_keyskeyZ
net_prefixnew_keyload_resultcur_key_ckpt_full_pathcheckpointer_model_wrapperr<   _state_dictstorage_readerload_plannerr   r   r"   load_model_from_checkpoint'   s   ,







r   c           !      C   s  |d urt |}|drdnd}|dr0|dkr|}n|ddr(|}n
tj|d}n|}dd	lm} d
}	||}
t	rB| S |	rgt
 rtd|
  tj|
td}t| drt| jdr| jjr|dkrtd i }g }g }|  }| D ]?}d|v sd|v r|dddd}||v r|| ||< || d|  q|| q||v r|| ||< q|| q|rtdt| d |d d D ]
}td|  q|rtd|d d  d |}| j|dd}td|
 d|  |d u rtd n|js|jstd  ntd! t }t| d"r^| jd ur^| j D ]\}}| d#|  q2| j! D ]\}}| d#|  qEtd$t| d% t
j"| d|d& ntd'|  t#|j$|j%d d
d(}t&| |dkr|ndd)}| }|dkr|'|}t(d
d*}t)||| || nt
 r*d+|v rtj|d,d-d.d/}nt|}d|v r|d }nd0|v r|d0 }n|}g }g }| D ]} | |v r||  || < q||  q| D ]} | |vr||  q|rtd|d d  d |r%td1|d d  d || t
j"| dd2 |d urKt
 rKtd3|
  t*|  |
 t+j,-  | S )4Nr+   ptdcpzs3:r3   r4   r5   r   )get_checkpoint_pathTz"Loading model cached locally from )weights_onlyrG   use_lorazIModel uses LoRA, mapping checkpoint keys to model keys with base_layer...zbase_layer.r0   r1   z -> zMapped z6 LoRA keys from checkpoint to model (showing first 5):   z  zMissing keys in checkpoint: 
   z... (showing first 10)F)strictzCheckpoint weights loaded from z: z5Checkpoint weights loaded successfully (strict=True).z:Checkpoint weights loaded successfully (all keys matched).zECheckpoint weights loaded; review missing_keys/unexpected_keys above.net_emar/   zSkipping sync for z> EMA parameters and buffers to avoid OOM during initialization)srcparams_and_buffers_to_ignorezLoading model from s3 r6   r9   r=   zs3://s3zcredentials/s3_training.secret)backends3_credential_path)backend_argsro   zUnexpected keys in checkpoint: )r   zCaching model state dict to ).rD   rb   
startswithrj   rk   rl   rm   3cosmos_predict2._src.imaginaire.utils.checkpoint_dbr   r   r
   is_rank0r   rC   r   rc   r   hasattrrG   r   ro   re   rf   appendr\   ri   rq   missing_keysrh   setr   named_parametersaddnamed_bufferssync_model_statesr   rE   rn   r   rp   r   r   dumprP   rT   empty_cache)!r5   rG   rt   r:   rx   r   checkpoint_formatr   r   load_from_locallocal_s3_ckpt_fplocal_state_dictmapped_state_dictZmapped_keysr   model_state_dict	model_keycheckpoint_keyZ
mapped_keyZ	load_infor   
param_name_buffer_namer   r   r   r   r   pt_state_dictmodel_staterh   r~   r   r   r"   r[      s   




















r[   rG   returnc                 C   s   | j jj}d| j j_t| j  }t|| | jj| jjd}|dkr@|| j j_t	|d}t
|dr<t|jr<|| |S td|S )a  
    Instantiate a model, load weights from a consolidated checkpoint, and initialize FSDP if required.

    Args:
        config: The configuration object for the experiment.

    Returns:
        model: The loaded and (optionally) FSDP-wrapped model.
    r   )r5   rG   rt   r:   )Zsharding_group_size
apply_fsdpzyModel does not implement 'apply_fsdp'. Please implement this method to enable FSDP after consolidated checkpoint loading.)r5   rG   rV   r   rT   r[   rE   rF   r:   r   r   callabler   AttributeError)rG   rV   r5   Zfsdp_device_meshr   r   r"   3create_model_from_consolidated_checkpoint_with_fsdpn  s*   



r   )r   FFTr   NFNFNFN)FNF)'r@   rk   typingr   rP   peftr   r   &cosmos_predict2._src.imaginaire.configr   %cosmos_predict2._src.imaginaire.flagsr   r   +cosmos_predict2._src.imaginaire.lazy_configr   %cosmos_predict2._src.imaginaire.modelr	   %cosmos_predict2._src.imaginaire.utilsr
   r   r   3cosmos_predict2._src.imaginaire.utils.config_helperr   r   -cosmos_predict2._src.imaginaire.utils.easy_ior   Z1cosmos_predict2._src.imaginaire.utils.fsdp_helperr   Z.cosmos_predict2._src.predict2.checkpointer.dcpr   r   r   r   boolrd   rD   r   r[   r   r   r   r   r"   <module>   s`   	



 !
 ,