o
    vžÄiõ<  ã                   @   sV  d Z ddlmZ ddlZddlmZ ddlmZmZmZ ddl	m
Z
 ddlmZ ddlmZmZ dd	lmZmZ dd
lmZ 	d"dejdedeej deej dB def
dd„Z	d#dedededededededB fdd„Z	d#dedededede
dededefdd„Zd#dedededededB f
dd„Z	d#dededededef
d d!„ZdS )$zt
Imaginaire4 Attention Subpackage:
Unified implementation for all Attention implementations.

NATTEN backend checks
é    )ÚpartialN)ÚTensor)Úattention_param_checksÚattention_tensor_checksÚ!multi_dim_attention_tensor_checks)Ú
CausalType)ÚNATTEN_SUPPORTED)Úget_bwd_dtypesÚget_fwd_dtypes)Úget_arch_tagÚlog_or_raise_error)Úsafe_logÚdtypeÚis_trainingÚ
dtypes_fwdÚ
dtypes_bwdÚreturnc                 C   s   |r
|dur
| |v S | |v S )an  
    Helper determining whether dtype is supported with different sets of supported dtypes for
    training and inference (forward+backward and forward).

    Parameters:
        dtype (torch.dtype): tensor element type.

        is_training (bool): whether use case can be used to backpropagate (tensor.requires_grad).

        dtypes_fwd (list[torch.dtype]): list of dtypes allowed for inference only (when not
            tensor.requires_grad).

        dtypes_bwd (list[torch.dtype] | None): Optional list of dtypes allowed for training only
            (when tensor.requires_grad), if different from dtypes_fwd.

    N© ©r   r   r   r   r   r   ú[/data/cameron/vidgen/cosmos-policy/cosmos_policy/_src/imaginaire/attention/natten/checks.pyÚdtype_supported(   s   r   FÚqueryÚkeyÚvalueÚ	is_causalÚ	is_varlenÚraise_errorc                 C   sX  t t|d}t| jƒ}| j}| j}	| jd |jd k}
| jd |jd k}|dk r0t d¡ dS t	j
t	jt	jt	jg}t	j
t	jg}t||	||d}|dv rR|
sR|rRd	S d
}|dvr`|d|› d7 }|
rf|d7 }|s|	rv|d|› d|› d7 }n|d|› d|› d7 }t d|› ¡ t	j
t	jg}t||	|d}|dkr¦|s¦|s¦|s¦|
s¦|r¦dS d
}|dkr´|d|› d7 }|rº|d7 }|rÀ|d7 }|rÆ|d7 }|
rÌ|d7 }|sÙ|d|› d|› d7 }t d|› ¡ t	jt	j
t	jg}t||	|d}|sö|rödS d
}|rþ|d7 }|s|d|› d|› d7 }t d|› ¡ |d|›d|›d |›d!|
›d"|›d#td$ dS )%a?  
    Chooses an FMHA backend in NATTEN (cutlass-fmha, hopper-fmha, blackwell-fmha) for the current
    use case based on features needed and current GPU architecture.

    Using tensor shapes, it infers whether MLA (head_dim_value != head_dim_qk) or
    GQA/MQA (heads_kv != heads_q) are required.
    Using tensor device, it infers GPU architecture and compatible backends.
    Using arguments is_causal and is_varlen, and other inferred features, it picks the best
    available backend.

    It is possible for no backend to be selected, if the combination of features is not available in
    any one of the NATTEN backends, in which case it will return None.

    Parameters:
        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        raise_error (bool): whether to raise an error if no backend is selected, instead of just
            returning None. Default is False.

    Returns:
        backend (str | None): selected NATTEN backend, if any compatible.

    ©r   éÿÿÿÿéþÿÿÿéK   zNNATTEN is not supported because compute capability is below the minimum (7.5).Nr   )éd   ég   úblackwell-fmhaÚ zIncompatible architecture (z, expected 100 or 103). z1Use case is MLA (head_dim_qk != head_dim_value). z
Data type z2 is not in list of supported dtypes for training: z. z3 is not in list of supported dtypes for inference: z9NATTEN backend blackwell-fmha is not compatible. Reason: )r   r   r   éZ   úhopper-fmhaz, expected 90). zUse case is causal. zUse case is varlen. zUse case is GQA/MQA. z% is not in list of supported dtypes: z6NATTEN backend hopper-fmha is not compatible. Reason: úcutlass-fmhaz7NATTEN backend cutlass-fmha is not compatible. Reason: z=Could not find a compatible NATTEN FMHA backend for arch_tag=z, is_causal=z, is_varlen=z	, is_mla=z, is_gqa_mqa=Ú.©Ú	exception)r   r   r   Údevicer   Úrequires_gradÚshapeÚlogÚdebugÚtorchÚfloat16Úbfloat16Úfloat8_e5m2Úfloat8_e4m3fnr   Úfloat32ÚRuntimeError)r   r   r   r   r   r   Ú	target_fnÚarch_tagr   r   Zis_mlaÚ
is_gqa_mqaZblackwell_fmha_fwd_dtypesZblackwell_fmha_bwd_dtypesZdtype_supported_blackwellÚreasonZhopper_fmha_dtypesZdtype_supported_hopperZcutlass_fmha_dtypesZdtype_supported_cutlassr   r   r   Úchoose_natten_backend@   sŠ   '

ÿÿÿÿÿÿýr;   Úcausal_typec                 C   s¾   t t|d}ts|dtd dS t| jƒ}t|ƒ}	t|ƒ}
t| |||	|
dd|dd	s2|dtd dS t	| ||||d	 |rM|t
jt
jfvrM|d
td dS t| |||||d}|du r]dS dS )a>  
    Input validation function for the NATTEN backend.
    Runs the common checks in addition to trying to find a compatible NATTEN backend. If any checks
    fail, or no compatible backend is found in NATTEN, returns False.

    Parameters:
        query (Tensor): 4-D query tensor, with the heads-last contiguous layout
            (`[batch, seqlen, heads, head_dim]`).

        key (Tensor): 4-D key tensor, with the heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim]`).

        value (Tensor): 4-D value tensor, with heads-last contiguous layout
            (`[batch, seqlen_kv, heads_kv, head_dim_v]`).

        is_causal (bool): whether or not causal masking is enabled.

        causal_type (CausalType): causal masking mode. Choices: `CausalType.TopLeft`,
            `CausalType.BottomRight`. Required when `is_causal = True`.

        is_varlen (bool): whether or not a variable length (varlen) use case. Must be inferred
            beforehand based on arguments such as seqlens_{Q,KV} or cumulative_seqlen_{Q,KV} being
            passed.

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with NATTEN backend.

    r   úlNATTEN is not supported in this environment. Run with debug logs to find out why, or choose another backend.r)   FTzNATTEN Attention©	r   r   r   Úsupported_dtypes_forwardÚsupported_dtypes_backwardÚsupports_mlaÚsupports_gqa_mqar   Úbackend_nameú)NATTEN does not support the given inputs.)r   r   r   r   r<   z?NATTEN Attention only supports top-left causal masking for now.)r   r   r   N)r   r   r   r6   r   r+   r
   r	   r   r   r   ÚTopLeftÚDontCarer;   )r   r   r   r   r<   r   r   r7   r8   Ú
fwd_dtypesÚ
bwd_dtypesÚnatten_backendr   r   r   Únatten_attention_check¼   sN   (þ
÷ûÿrJ   c                 C   s4   t | ||dd|d}ddddœ}||v sJ ‚|| S )ag  
    Chooses an FNA backend in NATTEN (cutlass-fna, hopper-fna, blackwell-fna) for the current
    use case based on features needed and current GPU architecture.

    Using tensor shapes, it infers whether MLA (head_dim_value != head_dim_qk) or
    GQA/MQA (heads_kv != heads_q) are required.
    Using tensor device, it infers GPU architecture and compatible backends.
    Using arguments is_causal and is_varlen, and other inferred features, it picks the best
    available backend.

    It is possible for no backend to be selected, if the combination of features is not available in
    any one of the NATTEN backends, in which case it will return None.

    Parameters:
        query (Tensor): 4-D, 5-D, or 6-D query tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads, head_dim]`).

        key (Tensor): 4-D, 5-D, or 6-D key tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim]`).

        value (Tensor): 4-D, 5-D, or 6-D value tensor, with heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim_v]`).

        raise_error (bool): whether to raise an error if no backend is selected, instead of just
            returning None. Default is False.

    Returns:
        backend (str | None): selected NATTEN backend, if any compatible.

    F)r   r   r   r   r   r   zcutlass-fnaz
hopper-fnazblackwell-fna)r'   r&   r#   )r;   )r   r   r   r   Zfmha_backendZ"natten_fmha_backend_to_fna_backendr   r   r   Úchoose_natten_multi_dim_backend  s   #ú
ýrK   c           	      C   s„   t t|d}ts|dtd dS t| jƒ}t|ƒ}t|ƒ}t| ||||dd|dd	s2|dtd dS t	| |||d}|d	u r@dS dS )
aÝ  
    Input validation function for the NATTEN multi-dimensional backend.
    Runs the common checks in addition to trying to find a compatible NATTEN backend. If any checks
    fail, or no compatible backend is found in NATTEN, returns False.

    Parameters:
        query (Tensor): 4-D, 5-D, or 6-D query tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads, head_dim]`).

        key (Tensor): 4-D, 5-D, or 6-D key tensor, with the heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim]`).

        value (Tensor): 4-D, 5-D, or 6-D value tensor, with heads-last contiguous layout
            (`[batch, *token_layout_shape, heads_kv, head_dim_v]`).

        raise_error (bool): whether to raise an error if any checks fail or no backend is selected,
            instead of just returning False. Default is False.

    Returns:
        success (bool): whether use case is compatible with NATTEN backend.

    r   r=   r)   FTz"NATTEN Multi-Dimensional Attentionr>   rD   N)
r   r   r   r6   r   r+   r
   r	   r   rK   )	r   r   r   r   r7   r8   rG   rH   rI   r   r   r   Ú natten_multi_dim_attention_checkL  s6   þ
÷rL   )N)F)Ú__doc__Ú	functoolsr   r0   r   Ú.cosmos_policy._src.imaginaire.attention.checksr   r   r   Ú-cosmos_policy._src.imaginaire.attention.masksr   Ú.cosmos_policy._src.imaginaire.attention.nattenr   Z3cosmos_policy._src.imaginaire.attention.natten.metar	   r
   Ú-cosmos_policy._src.imaginaire.attention.utilsr   r   r   r.   r   ÚboolÚlistr   Ústrr;   rJ   rK   rL   r   r   r   r   Ú<module>   sŒ   ÿÿÿÿÿ
þÿÿÿÿÿÿÿ
þ ùÿþýüûúù
ø$Z:üÿþýüû