o
    ?߱i*                     @   sb  d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZ ddlmZ e
eed	Zd
eiZ	d$dededededededB dededefddZdedee fddZ		d%dedededededB dededB dededB fddZ	d$dedededededefddZdedee fd d!Z	d&dededededB def
d"d#ZdS )'zl
Imaginaire4 Attention Subpackage:
Unified implementation for all Attention implementations.

Frontend APIs
    )Tensor)flash2_attention_check)flash3_attention_check)
CausalType)natten_attention_check natten_multi_dim_attention_check)get_arch_tag)safe_log)nattenflash2flash3r
   Fbackendquerykeyvalue	is_causalcausal_typeN	is_varlenraise_errorreturnc              	   C   sB   | du rt d| tvrt d|  dt|  |||||||dS )a  
    Input validation function a specified backend.
    Runs the common and backend-specific checks. Returns False if any checks fail, otherwise True.

    Parameters:
        backend (str): selected backend.

        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        causal_type (CausalType): causal masking mode. Choices: `CausalType.TopLeft`,
            `CausalType.BottomRight`. Required when `is_causal = True`.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with the backend.

    N2Cannot pass None backend to is_backend_compatible.Unrecognized backend name .)r   r   r   r   r   r   r   )
ValueErrorBACKEND_CHECK_MAPr   r   r   r   r   r   r   r    r   \/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/imaginaire/attention/backends.pyis_backend_compatible.   s   *r   arch_tagc                 C   sT   | dk rt d| d g S | dkrg dS | dv rddgS | d	kr'ddgS dgS )
a  
    Returns list of supported backends according to arch tag (attention.utils.get_arch_tag).
    Backends are ordered based on their known performance levels, so that the best-performing
    compatible backend is selected.

    Parameters:
        arch_tag (int): Arch tag for the current CUDA device. Example: 80 for A100, 90 for H100.

    Returns:
        backend_list (list[str]): a list of backend names (string). Empty if device is not supported.

    K   zAMinimum architecture supported for Attention is 75, got arch_tag=r   Z   )r   r
   r   )d   g   r
   r   P   logdebugr   r   r   r   get_backend_listi   s   r)   Tc           
      C   st   |durt || ||||||dr|S dS t| j}t|}	|	D ]}t || |||||ddr1|  S q|s6dS td)a  
    Selects a compatible backend, unless one is already selected, which runs its corresponding
    checks.

    Parameters:
        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        causal_type (CausalType): causal masking mode. Choices: `CausalType.TopLeft`,
            `CausalType.BottomRight`. Required when `is_causal = True`.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        backend (str | None): selected backend, if any.

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is **True**.

    Returns:
        backend (str | None): selected backend, or None if no backends are compatible.

    Nr   FzvCould not find a compatible Attention backend for this use case / device. Try running with debug logs to find out why.)r   r   devicer)   r   )
r   r   r   r   r   r   r   r   r   backend_listr   r   r   choose_backend   sB   *


r,   c                 C   s<   | du rt d| tvrt d|  dt|  ||||dS )a  
    Input validation function a specified multi-dimensional backend.
    Runs the common and backend-specific checks. Returns False if any checks fail, otherwise True.

    Parameters:
        backend (str): selected backend.

        query (Tensor): 4-D, 5-D, or 6-D query tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads, head_dim]`).

        key (Tensor): 4-D, 5-D, or 6-D key tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim]`).

        value (Tensor): 4-D, 5-D, or 6-D value tensor, with heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim_v]`).

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with the backend.

    Nr   r   r   )r   r   r   r   )r   BACKEND_MULTI_DIM_CHECK_MAPr   r   r   r   r   r   r   r   is_multi_dim_backend_compatible   s   r/   c                 C   s$   | dk rt d| d g S dgS )a  
    Returns list of supported multi-dimensional backends according to arch tag (attention.utils.get_arch_tag).
    Backends are ordered based on their known performance levels, so that the best-performing
    compatible backend is selected.

    Parameters:
        arch_tag (int): Arch tag for the current CUDA device. Example: 80 for A100, 90 for H100.

    Returns:
        backend_list (list[str]): a list of backend names (string). Empty if device is not supported.

    r    zSMinimum architecture supported for Multi-Dimensional Attention is 75, got arch_tag=r   r
   r%   r(   r   r   r   get_multi_dim_backend_list  s   r0   c                 C   s`   |durt || ||ddsJ |S t| j}t|}|D ]}t || ||ddr+|  S qtd)a  
    Selects a compatible multi-dimensional backend, unless one is already selected, which runs its
    corresponding checks.

    Parameters:
        query (Tensor): 4-D, 5-D, or 6-D query tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads, head_dim]`).

        key (Tensor): 4-D, 5-D, or 6-D key tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim]`).

        value (Tensor): 4-D, 5-D, or 6-D value tensor, with heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim_v]`).

        backend (str | None): selected backend, if any.

    Returns:
        backend (str): selected backend.

    NTr.   FzCould not find a compatible Multi-Dimensional Attention backend for this use case / device. Try running with debug logs to find out why.)r/   r   r*   r0   r   )r   r   r   r   r   r+   r   r   r   choose_multi_dim_backend#  s0   

	r1   )F)NT)N)__doc__torchr   Z7cosmos_predict2._src.imaginaire.attention.flash2.checksr   Z7cosmos_predict2._src.imaginaire.attention.flash3.checksr   /cosmos_predict2._src.imaginaire.attention.masksr   Z7cosmos_predict2._src.imaginaire.attention.natten.checksr   r   /cosmos_predict2._src.imaginaire.attention.utilsr   r	   r&   r   r-   strboolr   intlistr)   r,   r/   r0   r1   r   r   r   r   <module>   s   	
;/	
U
,