o
    ?߱i>                     @   sV  d Z ddlmZ ddlZddlmZ ddlmZmZmZ ddl	m
Z
 ddlmZ ddlmZmZ dd	lmZmZ dd
lmZ 	d"dejdedeej deej dB def
ddZ	d#dedededededededB fddZ	d#dedededede
dededefddZd#dedededededB f
ddZ	d#dededededef
d d!ZdS )$zt
Imaginaire4 Attention Subpackage:
Unified implementation for all Attention implementations.

NATTEN backend checks
    )partialN)Tensor)attention_param_checksattention_tensor_checks!multi_dim_attention_tensor_checks)
CausalType)NATTEN_SUPPORTED)get_bwd_dtypesget_fwd_dtypes)get_arch_taglog_or_raise_error)safe_logdtypeis_training
dtypes_fwd
dtypes_bwdreturnc                 C   s   |r
|dur
| |v S | |v S )an  
    Helper determining whether dtype is supported with different sets of supported dtypes for
    training and inference (forward+backward and forward).

    Parameters:
        dtype (torch.dtype): tensor element type.

        is_training (bool): whether use case can be used to backpropagate (tensor.requires_grad).

        dtypes_fwd (list[torch.dtype]): list of dtypes allowed for inference only (when not
            tensor.requires_grad).

        dtypes_bwd (list[torch.dtype] | None): Optional list of dtypes allowed for training only
            (when tensor.requires_grad), if different from dtypes_fwd.

    N r   r   r   r   r   r   a/data/cameron/vidgen/cosmos-predict2.5/cosmos_predict2/_src/imaginaire/attention/natten/checks.pydtype_supported(   s   r   Fquerykeyvalue	is_causal	is_varlenraise_errorc              
   C   s  t t|d}t| j}| j}| j}	| jd |jd k}
t| jd |jd }|dk r1t	d dS t
jt
jt
jt
jg}t
jt
jg}t||	||d}|dv }|dv rY|
sY|rY|rYd	S d
}|dvrg|d| d7 }|
rm|d7 }|s|	r}|d| d| d7 }n|d| d| d7 }|s|d|d7 }t	d|  t
jt
jg}t||	|d}|dv r|	 p|dv }|dkr|s|s|
s|r|rdS d
}|dkr|d| d7 }|r|d7 }|r|d7 }|
r|d7 }|s|d| d| d7 }|s|d|d|	d7 }t	d|  t
jt
jt
jg}t||	|d}|d d k}|r!|r!d!S d
}|s1|d| d| d7 }|s<|d|d7 }t	d"|  |d#|d$|d%|d&|
d'	td( dS ))a?  
    Chooses an FMHA backend in NATTEN (cutlass-fmha, hopper-fmha, blackwell-fmha) for the current
    use case based on features needed and current GPU architecture.

    Using tensor shapes, it infers whether MLA (head_dim_value != head_dim_qk) or
    GQA/MQA (heads_kv != heads_q) are required.
    Using tensor device, it infers GPU architecture and compatible backends.
    Using arguments is_causal and is_varlen, and other inferred features, it picks the best
    available backend.

    It is possible for no backend to be selected, if the combination of features is not available in
    any one of the NATTEN backends, in which case it will return None.

    Parameters:
        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        raise_error (bool): whether to raise an error if no backend is selected, instead of just
            returning None. Default is False.

    Returns:
        backend (str | None): selected NATTEN backend, if any compatible.

    r   K   zNNATTEN is not supported because compute capability is below the minimum (7.5).Nr   )    @      )d   g   blackwell-fmha zIncompatible architecture (z, expected 100 or 103). z1Use case is MLA (head_dim_qk != head_dim_value). z
Data type z2 is not in list of supported dtypes for training: z. z3 is not in list of supported dtypes for inference: z	head_dim=z is not supported. z9NATTEN backend blackwell-fmha is not compatible. Reason: )r   r   r   )r    r!   r"      Z   hopper-fmhaz, expected 90). zUse case is causal. zUse case is varlen. z% is not in list of supported dtypes: z with is_training=z6NATTEN backend hopper-fmha is not compatible. Reason:    r   cutlass-fmhaz7NATTEN backend cutlass-fmha is not compatible. Reason: z=Could not find a compatible NATTEN FMHA backend for arch_tag=z, is_causal=z, is_varlen=z	, is_mla=.	exception)r   r   r   devicer   requires_gradshapemaxlogdebugtorchfloat16bfloat16float8_e5m2float8_e4m3fnr   float32RuntimeError)r   r   r   r   r   r   	target_fnarch_tagr   r   Zis_mlahead_dimZblackwell_fmha_fwd_dtypesZblackwell_fmha_bwd_dtypesZdtype_supported_blackwellZhead_dim_supported_blackwellreasonZhopper_fmha_dtypesZdtype_supported_hopperZhead_dim_supported_hopperZcutlass_fmha_dtypesZdtype_supported_cutlassZhead_dim_supported_cutlassr   r   r   choose_natten_backend@   s   '

r@   causal_typec                 C   s   t t|d}ts|dtd dS t| j}t|}	t|}
t| |||	|
dd|dd	s2|dtd dS t	| ||||d	 |rM|t
jt
jfvrM|d
td dS t| |||||d}|du r]dS dS )a>  
    Input validation function for the NATTEN backend.
    Runs the common checks in addition to trying to find a compatible NATTEN backend. If any checks
    fail, or no compatible backend is found in NATTEN, returns False.

    Parameters:
        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        causal_type (CausalType): causal masking mode. Choices: `CausalType.TopLeft`,
            `CausalType.BottomRight`. Required when `is_causal = True`.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with NATTEN backend.

    r   lNATTEN is not supported in this environment. Run with debug logs to find out why, or choose another backend.r-   FTzNATTEN Attention	r   r   r   supported_dtypes_forwardsupported_dtypes_backwardsupports_mlasupports_gqa_mqar   backend_name)NATTEN does not support the given inputs.)r   r   r   r   rA   z?NATTEN Attention only supports top-left causal masking for now.)r   r   r   N)r   r   r   r;   r   r/   r
   r	   r   r   r   TopLeftDontCarer@   )r   r   r   r   rA   r   r   r<   r=   
fwd_dtypes
bwd_dtypesnatten_backendr   r   r   natten_attention_check   sN   (
rO   c                 C   s4   t | ||dd|d}dddd}||v sJ || S )ag  
    Chooses an FNA backend in NATTEN (cutlass-fna, hopper-fna, blackwell-fna) for the current
    use case based on features needed and current GPU architecture.

    Using tensor shapes, it infers whether MLA (head_dim_value != head_dim_qk) or
    GQA/MQA (heads_kv != heads_q) are required.
    Using tensor device, it infers GPU architecture and compatible backends.
    Using arguments is_causal and is_varlen, and other inferred features, it picks the best
    available backend.

    It is possible for no backend to be selected, if the combination of features is not available in
    any one of the NATTEN backends, in which case it will return None.

    Parameters:
        query (Tensor): 4-D, 5-D, or 6-D query tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads, head_dim]`).

        key (Tensor): 4-D, 5-D, or 6-D key tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim]`).

        value (Tensor): 4-D, 5-D, or 6-D value tensor, with heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim_v]`).

        raise_error (bool): whether to raise an error if no backend is selected, instead of just
            returning None. Default is False.

    Returns:
        backend (str | None): selected NATTEN backend, if any compatible.

    F)r   r   r   r   r   r   zcutlass-fnaz
hopper-fnazblackwell-fna)r+   r)   r%   )r@   )r   r   r   r   Zfmha_backendZ"natten_fmha_backend_to_fna_backendr   r   r   choose_natten_multi_dim_backend!  s   #
rP   c           	      C   s   t t|d}ts|dtd dS t| j}t|}t|}t| ||||dd|dd	s2|dtd dS t	| |||d}|d	u r@dS dS )
a  
    Input validation function for the NATTEN multi-dimensional backend.
    Runs the common checks in addition to trying to find a compatible NATTEN backend. If any checks
    fail, or no compatible backend is found in NATTEN, returns False.

    Parameters:
        query (Tensor): 4-D, 5-D, or 6-D query tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads, head_dim]`).

        key (Tensor): 4-D, 5-D, or 6-D key tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim]`).

        value (Tensor): 4-D, 5-D, or 6-D value tensor, with heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim_v]`).

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with NATTEN backend.

    r   rB   r-   FTz"NATTEN Multi-Dimensional AttentionrC   rI   N)
r   r   r   r;   r   r/   r
   r	   r   rP   )	r   r   r   r   r<   r=   rL   rM   rN   r   r   r    natten_multi_dim_attention_checkW  s6   
rQ   )N)F)__doc__	functoolsr   r5   r   0cosmos_predict2._src.imaginaire.attention.checksr   r   r   /cosmos_predict2._src.imaginaire.attention.masksr   0cosmos_predict2._src.imaginaire.attention.nattenr   Z5cosmos_predict2._src.imaginaire.attention.natten.metar	   r
   /cosmos_predict2._src.imaginaire.attention.utilsr   r   r   r3   r   boollistr   strr@   rO   rP   rQ   r   r   r   r   <module>   s   

 
$Z: