o
    vi+                     @   sp  d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZmZ ddlmZ dd	lmZ eeeed
ZdeiZ	d%dededededede
dB dededefddZdedee fddZ		d&dedededede
dB dededB dededB fddZ	d%dedededededefdd Zdedee fd!d"Z	d'dededededB def
d#d$ZdS )(zl
Imaginaire4 Attention Subpackage:
Unified implementation for all Attention implementations.

Frontend APIs
    )Tensor)cudnn_attention_check)flash2_attention_check)flash3_attention_check)
CausalType)natten_attention_check natten_multi_dim_attention_check)get_arch_tag)safe_log)cudnnnattenflash2flash3r   Fbackendquerykeyvalue	is_causalcausal_typeN	is_varlenraise_errorreturnc              	   C   sB   | du rt d| tvrt d|  dt|  |||||||dS )a  
    Input validation function a specified backend.
    Runs the common and backend-specific checks. Returns False if any checks fail, otherwise True.

    Parameters:
        backend (str): selected backend.

        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        causal_type (CausalType): causal masking mode. Choices: `CausalType.TopLeft`,
            `CausalType.BottomRight`. Required when `is_causal = True`.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with the backend.

    N2Cannot pass None backend to is_backend_compatible.Unrecognized backend name .)r   r   r   r   r   r   r   )
ValueErrorBACKEND_CHECK_MAPr   r   r   r   r   r   r   r    r   V/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/imaginaire/attention/backends.pyis_backend_compatible0   s   *r    arch_tagc                 C   sT   | dk rt d| d g S | dkrg dS | dv rg dS | dkr'g d	S d
gS )a  
    Returns list of supported backends according to arch tag (attention.utils.get_arch_tag).
    Backends are ordered based on their known performance levels, so that the best-performing
    compatible backend is selected.

    Parameters:
        arch_tag (int): Arch tag for the current CUDA device. Example: 80 for A100, 90 for H100.

    Returns:
        backend_list (list[str]): a list of backend names (string). Empty if device is not supported.

    K   zAMinimum architecture supported for Attention is 75, got arch_tag=r   Z   )r   r   r   r   )d   g   )r   r   r   P   )r   r   r   r   logdebugr!   r   r   r   get_backend_listk   s   r+   Tc           
      C   st   |durt || ||||||dr|S dS t| j}t|}	|	D ]}t || |||||ddr1|  S q|s6dS td)a  
    Selects a compatible backend, unless one is already selected, which runs its corresponding
    checks.

    Parameters:
        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        causal_type (CausalType): causal masking mode. Choices: `CausalType.TopLeft`,
            `CausalType.BottomRight`. Required when `is_causal = True`.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        backend (str | None): selected backend, if any.

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is **True**.

    Returns:
        backend (str | None): selected backend, or None if no backends are compatible.

    Nr   FzvCould not find a compatible Attention backend for this use case / device. Try running with debug logs to find out why.)r    r	   devicer+   r   )
r   r   r   r   r   r   r   r   r!   backend_listr   r   r   choose_backend   sB   *


r.   c                 C   s<   | du rt d| tvrt d|  dt|  ||||dS )a  
    Input validation function a specified multi-dimensional backend.
    Runs the common and backend-specific checks. Returns False if any checks fail, otherwise True.

    Parameters:
        backend (str): selected backend.

        query (Tensor): 4-D, 5-D, or 6-D query tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads, head_dim]`).

        key (Tensor): 4-D, 5-D, or 6-D key tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim]`).

        value (Tensor): 4-D, 5-D, or 6-D value tensor, with heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim_v]`).

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with the backend.

    Nr   r   r   )r   r   r   r   )r   BACKEND_MULTI_DIM_CHECK_MAPr   r   r   r   r   r   r   r   is_multi_dim_backend_compatible   s   r1   c                 C   s$   | dk rt d| d g S dgS )a  
    Returns list of supported multi-dimensional backends according to arch tag (attention.utils.get_arch_tag).
    Backends are ordered based on their known performance levels, so that the best-performing
    compatible backend is selected.

    Parameters:
        arch_tag (int): Arch tag for the current CUDA device. Example: 80 for A100, 90 for H100.

    Returns:
        backend_list (list[str]): a list of backend names (string). Empty if device is not supported.

    r"   zSMinimum architecture supported for Multi-Dimensional Attention is 75, got arch_tag=r   r   r'   r*   r   r   r   get_multi_dim_backend_list  s   r2   c                 C   s`   |durt || ||ddsJ |S t| j}t|}|D ]}t || ||ddr+|  S qtd)a  
    Selects a compatible multi-dimensional backend, unless one is already selected, which runs its
    corresponding checks.

    Parameters:
        query (Tensor): 4-D, 5-D, or 6-D query tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads, head_dim]`).

        key (Tensor): 4-D, 5-D, or 6-D key tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim]`).

        value (Tensor): 4-D, 5-D, or 6-D value tensor, with heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim_v]`).

        backend (str | None): selected backend, if any.

    Returns:
        backend (str): selected backend.

    NTr0   FzCould not find a compatible Multi-Dimensional Attention backend for this use case / device. Try running with debug logs to find out why.)r1   r	   r,   r2   r   )r   r   r   r   r!   r-   r   r   r   choose_multi_dim_backend)  s0   

	r3   )F)NT)N)__doc__torchr   Z4cosmos_policy._src.imaginaire.attention.cudnn.checksr   Z5cosmos_policy._src.imaginaire.attention.flash2.checksr   Z5cosmos_policy._src.imaginaire.attention.flash3.checksr   -cosmos_policy._src.imaginaire.attention.masksr   Z5cosmos_policy._src.imaginaire.attention.natten.checksr   r   -cosmos_policy._src.imaginaire.attention.utilsr	   r
   r(   r   r/   strboolr    intlistr+   r.   r1   r2   r3   r   r   r   r   <module>   s   	
;3	
U
,