o
    ?߱i                     @   s   d dl Z d dlmZmZ d dlmZmZ d dlmZ 					dde	dee	 dee	 d	e
d
ee dee	 de	fddZ						dde	dee	 dee	 d	e
d
ee dee	 dee defddZdS )    N)AnyOptional)distributedlog)easy_ioTs3_pathcache_fp	cache_dir	rank_syncbackend_argsbackend_keyreturnc              	   C   s  |du r
t jdn|}|du rt jdt jdn|}t j|}|du r2t j|| dd}|ds>t j||}t	 dkr`t j
|r`t j|d	k r`t | td
| d |rt j
|std| d|  d| d td|  td|  tj| |d||d td|  d| d ntd| d|  d| d t  |S t j
|stj| |d||d td|  d| d |S )a^  download data from S3 with optional caching.

    This function first attempts to load the data from a local cache file. If
    the cache file doesn't exist, it downloads the data from S3 to the cache
    location. Caching is performed in a rank-aware manner
    using `distributed.barrier()` to ensure only one download occurs across
    distributed workers (if `rank_sync` is True).

    Args:
        s3_path (str): The S3 path of the data to load.
        cache_fp (str, optional): The path to the local cache file. If None,
            a filename will be generated based on `s3_path` within `cache_dir`.
        cache_dir (str, optional): The directory to store the cache file. If
            None, the environment variable `IMAGINAIRE_CACHE_DIR` (defaulting
            to "/tmp") will be used.
        rank_sync (bool, optional): Whether to synchronize download across
            distributed workers using `distributed.barrier()`. Defaults to True.
        backend_args (dict, optional): The backend arguments passed to easy_io to construct the backend.
        backend_key (str, optional): The backend key passed to easy_io to registry the backend or retrieve the backend if it is already registered.

    Returns:
        cache_fp (str): The path to the local cache file.

    Raises:
        FileNotFoundError: If the data cannot be found in S3 or the cache.
    N
TORCH_HOMEIMAGINAIRE_CACHE_DIRz~/.cache/imaginairezs3:// /r      zRemoved empty cache file .zLocal cache z Not exist! Downloading z to zbackend_args: zbackend_key: file)dst_typer   r   zDownloaded z already exist! z -> )osenvirongetpath
expanduserjoinreplace
startswithr   get_rankexistsgetsizeremover   warningcriticalinfor   copyfile_to_localbarrier)r   r   r	   r
   r   r    r'   X/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/imaginaire/utils/s3_utils.pydownload_from_s3_with_cache   sB   "



r)   easy_io_kwargsc                 C   s0   t | |||||}|du ri }tj|fi |S )ap  Loads data from S3 with optional caching.

    This function first attempts to load the data from a local cache file. If
    the cache file doesn't exist, it downloads the data from S3 to the cache
    location and then loads it. Caching is performed in a rank-aware manner
    using `distributed.barrier()` to ensure only one download occurs across
    distributed workers (if `rank_sync` is True).

    Args:
        s3_path (str): The S3 path of the data to load.
        cache_fp (str, optional): The path to the local cache file. If None,
            a filename will be generated based on `s3_path` within `cache_dir`.
        cache_dir (str, optional): The directory to store the cache file. If
            None, the environment variable `IMAGINAIRE_CACHE_DIR` (defaulting
            to "/tmp") will be used.
        rank_sync (bool, optional): Whether to synchronize download across
            distributed workers using `distributed.barrier()`. Defaults to True.
        backend_args (dict, optional): The backend arguments passed to easy_io to construct the backend.
        backend_key (str, optional): The backend key passed to easy_io to registry the backend or retrieve the backend if it is already registered.

    Returns:
        Any: The loaded data from the S3 path or cache file.

    Raises:
        FileNotFoundError: If the data cannot be found in S3 or the cache.
    N)r)   r   load)r   r   r	   r
   r   r   r*   r'   r'   r(   load_from_s3_with_cached   s   #r,   )NNTNN)NNTNNN)r   typingr   r   %cosmos_predict2._src.imaginaire.utilsr   r   -cosmos_predict2._src.imaginaire.utils.easy_ior   strbooldictr)   r,   r'   r'   r'   r(   <module>   s^   
O