
     h                         d dl Zd dlZd
dZd Zd Zd Z	 d dlmZ eZ	y# e
$ r*  ed        G d d	ej                  j                        Z	Y yw xY w)    Nc                    t        j                  |t         j                        }t        j                  |t         j                        }t        j                  ||      }t        j                  |d      }|j                  dd||g      }t        | |      }|dkD  r.t        j                  t        j                  || g      |gd      }|S )z
    grid_size: int of the grid height and width
    return:
    pos_embed: [grid_size*grid_size, embed_dim] or [n_cls_token+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
    dtyper   axis      )	nparangefloat32meshgridstackreshape!get_2d_sincos_pos_embed_from_gridconcatenatezeros)	embed_dim	grid_sizen_cls_tokengrid_hgrid_wgrid	pos_embeds          D/home/cameronsmith/repos/controll3r/dust3r/croco/models/pos_embed.pyget_2d_sincos_pos_embedr      s     YYy

3FYYy

3F;;vv&D88Dq!D<<Ay)45D1)TBI1}NNBHHk9-E$F	#RYZ[	    c                     | dz  dk(  sJ t        | dz  |d         }t        | dz  |d         }t        j                  ||gd      }|S )Nr   r   r	   r   )!get_1d_sincos_pos_embed_from_gridr
   r   )r   r   emb_hemb_wembs        r   r   r   (   sT    q=A .i1nd1gFE-i1nd1gFE
..%a
0CJr   c                 L   | dz  dk(  sJ t        j                  | dz  t              }|| dz  z  }dd|z  z  }|j                  d      }t        j                  d||      }t        j
                  |      }t        j                  |      }t        j                  ||gd	
      }|S )z}
    embed_dim: output dimension for each position
    pos: a list of positions to be encoded: size (M,)
    out: (M, D)
    r   r   r   g       @      ?i'  zm,d->mdr	   r   )r
   r   floatr   einsumsincosr   )r   posomegaoutemb_sinemb_cosr!   s          r   r   r   3   s     q=AIIi1nE2E	Y^EE
++b/C
))IsE
*CffSkGffSkG
..'7+!
4CJr   c                 ~   d|v r8|d   }|j                   d   }| j                  j                  }| j                  j                   d   |z
  }t	        |j                   d   |z
  dz        }t	        |dz        }||k7  rt        d||||fz         |d d d |f   }|d d |d f   }	|	j                  d|||      j                  dddd	      }	t        j                  j                  j                  |	||fd
d      }	|	j                  dd	dd      j                  dd	      }	t        j                  ||	fd      }
|
|d<   y y y )Nr   r$   g      ?z(Position interpolate from %dx%d to %dx%dr      r	   r   bicubicF)sizemodealign_cornersdim)shapepatch_embednum_patchesr   intprintr   permutetorchnn
functionalinterpolateflattencat)modelcheckpoint_modelpos_embed_checkpointembedding_sizer9   num_extra_tokens	orig_sizenew_sizeextra_tokens
pos_tokensnew_pos_embeds              r   interpolate_pos_embedrM   N   sr   &&/<-33B7''33 ??004{B-33B7:JJsRS	{c)* <	9V^`h?iij/3D4D3D0DEL-a1A1B.BCJ#++B	9nU]]^_abdeghiJ,,88(H!5IUZ 9 \J#++Aq!Q7??1EJ!II|Z&@aHM,9[) ! 'r   )cuRoPE2DzZWarning, cannot find cuda-compiled version of RoPE2D, using a slow pytorch version insteadc                   B     e Zd Zd fd	Zd Zed        Zd Zd Z xZ	S )RoPE2Dc                 L    t         |           || _        || _        i | _        y )N)super__init__baseF0cache)selffreqrU   	__class__s      r   rS   zRoPE2D.__init__r   s#    GDIDGDJr   c                    ||||f| j                   vrd| j                  t        j                  d|d      j	                         j                  |      |z  z  z  }t        j                  |||j                        }t        j                  d||      j                  |      }t        j                  ||fd      }|j                         }|j                         }	||	f| j                   ||||f<   | j                   ||||f   S )Nr#   r   r   )devicer   zi,j->ijr$   r5   )rV   rT   r=   r   r%   tor   r&   rB   r(   r'   )
rW   Dseq_lenr[   r   inv_freqtfreqsr(   r'   s
             r   get_cos_sinzRoPE2D.get_cos_sinx   s    '&'tzz9$))Q10E0K0K0M0P0PQW0X[\0\"]^LLx~~NY8<??F		5%.b9iikiik69#Y

1WVE12::au455r   c                     | dd | j                   d   dz  f   | d| j                   d   dz  d f   }}t        j                  | |fd      S )N.r$   r   r5   )r7   r=   rB   )xx1x2s      r   rotate_halfzRoPE2D.rotate_half   sY    s.aggbkQ.../3q8H8J3J1KB99rc2YB//r   c                 4   |j                   dk(  sJ t        j                  j                  j	                  ||      d d d d d d d f   }t        j                  j                  j	                  ||      d d d d d d d f   }||z  | j                  |      |z  z   S )Nr   )ndimr=   r>   r?   	embeddingrg   )rW   tokenspos1dr(   r'   s        r   apply_rope1dzRoPE2D.apply_rope1d   s    ::q= ((%%//s;AtQMJC((%%//s;AtQMJCSLT%5%5f%=%CDDr   c                    |j                  d      dz  dk(  sJ d       |j                  d      dz  }|j                  dk(  r|j                  d   dk(  sJ | j                  |t	        |j                               dz   |j                  |j                        \  }}|j                  dd      \  }}| j                  ||dddddf   ||      }| j                  ||dddddf   ||      }t        j                  ||fd      }|S )	a  
            input:
                * tokens: batch_size x nheads x ntokens x dim
                * positions: batch_size x ntokens x 2 (y and x position of each token)
            output:
                * tokens after appplying RoPE2D (batch_size x nheads x ntokens x dim)
            r0   r   r   z0number of dimensions should be a multiple of twor$   r	   r5   N)r2   ri   r7   rb   r:   maxr[   r   chunkrm   r=   rB   )rW   rk   	positionsr]   r(   r'   yrd   s           r   forwardzRoPE2D.forward   s     ;;q>!#Q&Z(ZZA!#A>>1$)<)AA''3y}}+?+A6==RXR^R^_HC<<r<*DAq!!!Yq1u%5sC@A!!!Yq1u%5sC@AYY1v2.FMr   )g      Y@r#   )
__name__
__module____qualname__rS   rb   staticmethodrg   rm   rs   __classcell__)rY   s   @r   rP   rP   p   s,    			6 
	0 
	0	E	r   rP   )r   )numpyr
   r=   r   r   r   rM   models.curoperN   rP   ImportErrorr;   r>   Module r   r   <module>r~      sZ     $6:85&F 2	
fg/ /2s     ,AA