o
    \i                     @   s   d Z ddlZddlZddlmZ ddlZddlm  mZ	 ddl
mZ ddlmZ ddlZee jd Zed Zejdee ejdee ddlmZ dd	lmZmZ d
ededejfddZdd Zedkrse  dS dS )zcTrain VideoLatentModel: VidTok latents as targets (no grad), MSE loss, wandb, vis every ~100 iters.    N)Path)
DataLoader)	make_grid   ZVidTok)VideoLatentModel)DroidVideoDatasetcollate_batchconfig_path	ckpt_pathdevicec                 C   s(   ddl m} || |}|| }|S )Nr   )load_model_from_config)Zscripts.inference_evaluater   toeval)r	   r
   r   r   model r   +/data/cameron/vidgen/our_vid_model/train.pyload_vidtok   s   
r   c                  C   s:  t  } | jdtddd | jdtd dd | jdtd d | jd	td d | jd
tdd | jdtdd | jdtdd | jdtdd | jdtdd | jdtdd | jdtdd | jdtdd |  }t|jpqtt	
 }t|j
 }t|j}|jpt|d }|jpt|d }t| st|| }t| st|| }tj|jt|d t|||}t||}tjj| |jd}	t|jdddd }
t|
|jd!|j t!d!d"}d#}||j"k r|D ]#}||j"kr n||}t# * tj$dtj%d$ |j&|d!d%\}}W d    n	1 sw   Y  W d    n	1 s/w   Y  | }||}t'(||}|	)  |*  tj+j,-| d& |	.  tj/d'|0 i|d( |d#kr||j1 d#krt# ; tj$dtj%d$ |2|d d) }|2|d d) }W d    n	1 sw   Y  | }| }W d    n	1 sw   Y  d*d+ }||d d) }||}||}t3|dd,}t3|dd,}t3|dd,}tj/t4|5d)d-d#6 7 t4|5d)d-d#6 7 t4|5d)d-d#6 7 d.|d( |d)7 }q||j"k st8  d S )/Nz	--keygripz
../keygripzkeygrip repo root for DINO)typedefaulthelpz--vidtokz%VidTok repo root (default: ../VidTok)z--vidtok-config)r   r   z--vidtok-ckptz--data-rootz /data/weiduoyuan/droid_raw/1.0.1z--batch-size   z	--workersz--lrg-C6?z--stepsi'  z--vis-everyd   z--devicecudaz--wandb-projectZour_vid_modelz7configs/vidtok_v1_1/vidtok_kl_causal_288_8chn_v1_1.yamlz/checkpoints/vidtok_kl_causal_288_8chn_v1_1.ckpt)Zprojectconfig)lr   g      @   )Z
num_framesZ
sample_fpssizeT)
batch_sizeshufflenum_workers
collate_fn
pin_memoryr   )device_typedtype)Zreturn_reg_log      ?z
train/loss)stepr   c                 S   s$   | d  ddddd d ddS )Nr   r         r%   g       @)permuteclamp)tr   r   r   <lambda>b   s   $ zmain.<locals>.<lambda>)Znrowr'   )zvis/input_frameszvis/gt_recon_frameszvis/pred_recon_frames)9argparseArgumentParseradd_argumentstrintfloat
parse_argsr   vidtokVidTok_rootresolveZkeygriptorchr   Zvidtok_configZvidtok_ckptis_absolutewandbinitZwandb_projectvarsr   r   r   optimAdamW
parametersr   r   Z	data_rootr   r   Zworkersr   stepsno_gradautocastfloat16encodeFmse_loss	zero_gradbackwardnnutilsclip_grad_norm_r&   logitemZ	vis_everydecoder   Imager)   cpunumpyfinish)pargsZvidtok_rootkeygrip_rootr   r	   r
   r4   r   optdatasetloaderZglobal_stepbatchxZz_gt_Zz_predlossZ
x_recon_gtZx_recon_predZto_01inpZgt_reconZ
pred_reconZgrid_inpZgrid_gtZ	grid_predr   r   r   main   s   	


 



&r]   __main__) __doc__r-   syspathlibr   r7   Ztorch.nn.functionalrH   
functionalrD   torch.utils.datar   Ztorchvision.utilsr   r9   __file__r6   parentsZvidgen_rootr5   pathinsertr0   Zour_vid_model.modelr   Zour_vid_model.datasetr   r   r   r   r]   __name__r   r   r   r   <module>   s(    U
