o
    vi                     @   sp   d dl Z d dlZd dlZd dlZd dlmZ dd ZG dd deZ	dddZ
d	d
 Zdd ZG dd dZdS )    N)loggerc               
   C   s,  zzgt   t  } t| D ]X}t |}t |}t|tr$|d}t	d| d|  d|v s6d|v r? W W t 
  dS d|v rL W W t 
  dS d|v rY W W t 
  dS d|v rf W W t 
  dS qW n t jy } zt	d	|  W Y d
}~nd
}~ww W t 
  dS W t 
  dS t 
  w )z
    Retrieves the GPU architecture of the available GPUs.

    Returns:
        str: The GPU architecture, which can be "H100", "A100", or "Other".
    zutf-8zGPU z	: Model: H100ZH200ZA100ZL40SB200zFailed to get GPU info: NOther)pynvmlnvmlInitnvmlDeviceGetCountrangenvmlDeviceGetHandleByIndexnvmlDeviceGetName
isinstancebytesdecodeprintnvmlShutdown	NVMLError)device_countihandle
model_nameerror r   P/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/imaginaire/utils/device.pyget_gpu_architecture   sD   





r   c                   @   s   e Zd ZdZdS )GPUArchitectureNotSupportedzV
    Custom exception raised when the expected GPU architecture is not supported.
    N)__name__
__module____qualname____doc__r   r   r   r   r   ;   s    r   c              
   C   s   z/t   t t d}t|  d|jd d  d|jd d  d|jd d  d W d S  t j	yJ } zt
d|  W Y d }~d S d }~ww )Nr   z:    /z
MiB used (z	MiB free)Failed to get GPU memory info: )r   r   nvmlDeviceGetMemoryInfor
   logginginfousedtotalfreer   r   )strmeminfor   r   r   r   print_gpu_memC   s   8
r*   c                   C   s.   t   td t  t   td t   d S )Nzgc()zempty cuda cache)r*   r   gccollectr   r   r   r   force_gcN   s   
r-   c               
   C   sh   zt   t t d} | jd d d dkW S  t jy3 } ztd|  W Y d }~d S d }~ww )Nr   r   P   r!   )r   r   r"   r
   r&   r   r   )r)   r   r   r   r   gpu0_has_80gb_or_lessX   s   r/   c                       sT   e Zd Zee d Zdef fddZ	de
fddZdee fdd	Z  ZS )
Device@   
device_idxc                    s   t    t|| _d S N)super__init__r   r
   r   )selfr2   	__class__r   r   r5   d   s   
zDevice.__init__returnc                 C   s   t | jS r3   )r   r   r   )r6   r   r   r   get_nameh   s   zDevice.get_namec                 C   sP   d}t | jtjD ]	}d|| }q
dd |D }|  dd t|D S )N z{:064b}c                 S   s   g | ]}t |qS r   )int).0xr   r   r   
<listcomp>p   s    z+Device.get_cpu_affinity.<locals>.<listcomp>c                 S   s   g | ]
\}}|d kr|qS )r   r   )r=   r   er   r   r   r?   r   s    )r   nvmlDeviceGetCpuAffinityr   r0   _nvml_affinity_elementsformatreverse	enumerate)r6   affinity_stringjaffinity_listr   r   r   get_cpu_affinityk   s   zDevice.get_cpu_affinity)r   r   r   mathceilos	cpu_countrB   r<   r5   r(   r:   listrI   __classcell__r   r   r7   r   r0   a   s
    r0   r3   )r+   rJ   rL   r   logurur   r#   r   	Exceptionr   r*   r-   r/   r0   r   r   r   r   <module>   s   #

	