o
    vi!                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ dad	e	d
e jddfddZdS )    N)	OmegaConf)Config)distributedlog)get_cluster_env)easy_io)	CRED_ENVSFconfigargsreturnc              	   C   sT  t d }tt|d }t|d | 	 
 d}tj| jjjs/tjdv r]tjdd| jjj d| jj d| dd| jjj d| jj dd	| jjjd
d dantd td| jj  t }t dkr| jj}td|  tj| jjdd d t!j"|j#|j$d}t%||d< d| jjj d| jj d|d< tjdrt&dd}|' ( |d< td|d   W d   n1 sw   Y  tjdr	t&dd}|' ( |d< td|d   W d   n	1 sw   Y  t&| dd}t)*|| W d   n	1 s#w   Y  t&| dd}t)*|| W d   n	1 sBw   Y  t+j,r[t+j,j#j-dd  |. D dd! | j/rtrtd"| jjj d| jj d| d | jj d#}t0|d$|1dd%   |2d&d'}	t0|	d$|	1dd%   t0| dd( t0| dd) tjd*rt0d*d+ tjd,rt0d,d- tjd.rt0d.d/ td0rt3d0}
ni }
|d| jjj d| jj d| dd1||
t4|
< t5|
 t6|
d0 ntd2 | j/r&tr(t6|d3t d4d' dS dS dS )5a  
    Configures the environment for reproducibility of experiments by setting up
    S3 backends for storage, logging important job details, and saving configuration and
    environment details both locally and on S3.
    This function is crucial for ensuring that all aspects of the computational environment are captured and can be
    replicated for future runs or analysis.

    Parameters:
        config (Config): A configuration object containing all the settings necessary
                         for the job, including paths and credentials.
        args (argparse.Namespace): An argparse namespace containing the command line
                                   arguments passed to the script. This includes configurations
                                   and any overrides specified at runtime.

    Actions:
        - Sets up S3 backend for storing user data and other outputs.
        - Logs job paths and critical information regarding job execution.
        - Saves the job configuration locally only for the main node in a distributed setting.
        - Captures and logs command-line execution details.
        - Optionally reads git commit and branch information if available and logs them.
        - Saves both job environment information and launch details locally and syncs these to S3.
        - Supports conditional integration with Weights & Biases (wandb) for experiment tracking.

    Notes:
        - The function is designed to run within a distributed environment where certain actions
          (like saving configurations) are restricted to the main node (rank 0).
        - It uses the 'easy_io' module for interacting with S3, ensuring files are written and
          read correctly from the object store.
        - It leverages OmegaConf for saving YAML configurations
        - git information is read from 'git_commit.txt' and 'git_branch.txt' files if they exist.
        - snapshot codebase is saved as 'codebase.zip' if it exists in the current directory.

    Raises:
        FileNotFoundError: If specific files like 'git_commit.txt' or 'codebase.zip' are expected
                           but not found.
        IOError: If there are issues in file handling operations, particularly with file
                 reading/writing.
    z%Y-%m-%d_%H-%M-%Szutf-8r   )proddevstgs3zs3:///z
/job_runs/)s3://timestamps_rundir/zs3://rundir/)backendpath_mappings3_credential_path)backend_argsTz4S3 credentials not found. Skipping easy_io S3 setup.z
Job path: zJob local path: )exist_ok )cmdZargs_cfg_pathargs_overridejob_local_pathzgit_commit.txtr	commit_idzCommit id: Nzgit_branch.txtZ
git_branchzgit branch: z/job_env.yamlwz/launch_info.yamlc                 S   s   i | ]
\}}d | |qS )z	JOB_INFO/ ).0kvr   r   P/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/imaginaire/utils/launch.py
<dictcomp>   s    z*log_reproducible_setup.<locals>.<dictcomp>)allow_val_changez%Uploading reproducible setup to s3://z/config.pklr   z.pklz.yamlz#s3://timestamps_rundir/job_env.yamlz's3://timestamps_rundir/launch_info.yamlzcodebase.zipz#s3://timestamps_rundir/codebase.zipzcode.tar.gzz"s3://timestamps_rundir/code.tar.gzzgit_diff.txtz#s3://timestamps_rundir/git_diff.txtzs3://rundir/job_history.yaml)	timestampZreproduce_dirz@S3 credentials not found. Skipping upload of reproducible setup.z(s3://timestamps_rundir/cluster_env/RANK_06d)7timestrftimetorch
ByteTensor	bytearraycudar   	broadcastcpunumpytobytesdecodeospathexists
checkpointsave_to_object_storecredentialsr   APP_ENVr   set_s3_backendbucketjobS3_READYr   warningr   get_rank
path_localcriticalmakedirsjoinsysargvr	   optsstropenreadstripr   savewandbrunupdateitemsupload_reproducible_setupcopyfile_from_localsplitreplaceloadlenprintdump)r	   r
   Zrun_timestampZtime_tensorjob_infor   Zlaunch_infofZconfig_pkl_save_fpZconfig_yaml_save_fpZjob_historyr   r   r"   log_reproducible_setup#   s   ( 


    
rZ   )argparser3   rD   r(   r*   rL   	omegaconfr   $cosmos_policy._src.imaginaire.configr   #cosmos_policy._src.imaginaire.utilsr   r   Z/cosmos_policy._src.imaginaire.utils.cluster_envr   +cosmos_policy._src.imaginaire.utils.easy_ior   ?cosmos_policy._src.imaginaire.utils.env_parsers.cred_env_parserr   r=   	NamespacerZ   r   r   r   r"   <module>   s   