
    i9	hL                     N   d dl  d dlmZ d dlmZ d dlmZ d dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmc mZ d dlZ
d dlZ
d dlZ
d dlZd dlmZ ddlmZmZmZ d	d
lmZmZmZ ddlmZ  G d dej@                        Z! G d dej@                        Z" G d dej@                        Z#y)    )*)Number)partial)PathN)hf_hub_download   )normalized_view_plane_uvrecover_focal_shiftgaussian_blur_2d   )wrap_dinov2_attention_with_sdpa'wrap_module_with_gradient_checkpointing)unwrap_module_with_gradient_checkpointing)timeitc                   L     e Zd Zddededededed   ded   f fd	Zd
 Z xZS )ResidualConvBlockin_channelsout_channelshidden_channelspadding_mode
activation)relu
leaky_relusiluelunorm
group_norm
layer_normc                 .   t         t        |           ||}||}|dk(  rd }n)|dk(  rd }n |dk(  rd }n|dk(  rd }nt        d	|       t	        j
                  t	        j                  d
|       |       t	        j                  ||dd
|      t	        j                  |dk(  r|dz  nd
|       |       t	        j                  ||dd
|            | _        ||k7  rt	        j                  ||d
d      | _
        y t	        j                         | _
        y )Nr   c                  .    t        j                  d      S NTinplace)nnReLU     9/home/cameronsmith/repos/controll3r/MoGe/moge/model/v1.py<lambda>z,ResidualConvBlock.__init__.<locals>.<lambda>        RWWT%: r(   r   c                  0    t        j                  dd      S )Ng?T)negative_sloper$   )r%   	LeakyReLUr'   r(   r)   r*   z,ResidualConvBlock.__init__.<locals>.<lambda>"   s    R\\d%S r(   r   c                  .    t        j                  d      S r"   )r%   SiLUr'   r(   r)   r*   z,ResidualConvBlock.__init__.<locals>.<lambda>$   r+   r(   r   c                  .    t        j                  d      S r"   )r%   ELUr'   r(   r)   r*   z,ResidualConvBlock.__init__.<locals>.<lambda>&   s    RVVD%9 r(   z!Unsupported activation function: r      )kernel_sizepaddingr   r       r   )r4   r5   )superr   __init__
ValueErrorr%   
Sequential	GroupNormConv2dlayersIdentityskip_connection)	selfr   r   r   r   r   r   activation_cls	__class__s	           r)   r8   zResidualConvBlock.__init__   s$   /1&L")O:N<'SN& :N5 9N@MNNmmLLK(IIk?1[ghLL$,2FB.A_IIo|A\hi
 bmp|a|ryylPQ[\]  CE  CN  CN  CPr(   c                 T    | j                  |      }| j                  |      }||z   }|S N)r?   r=   )r@   xskips      r)   forwardzResidualConvBlock.forward5   s-    ##A&KKNHr(   )NN	replicater   r   )	__name__
__module____qualname__intstrLiteralr8   rG   __classcell__rB   s   @r)   r   r      sf    PC Ps PTW Pnq P  NU  Vy  Nz P  KR  Sm  Kn P:r(   r   c                        e Zd Zdg dddddddfdeded	ee   d
edee   dededed   dededef fdZdedefdZded	edededededed   fdZde	j                  de	j                  fdZ xZS )Head         rV   r   r   r   r6   num_featuresdim_indim_outdim_projdim_upsampledim_times_res_block_hiddennum_res_blocksres_block_normr   last_res_blockslast_conv_channelslast_conv_sizec                 T   t         |           t        j                  t	        |      D cg c]  }t        j
                  ||ddd       c}      | _        t        j                  t        |g|d d z   |      D cg c]B  \  }t        j                  | j                  |dz         gfdt	        |      D         D c}}      | _
        t        j                  |D cg c]  }| j                  |d   dz   ||	|
|      ! c}      | _        y c c}w c c}}w c c}w )Nr   r   )r   r   r4   strider5   r   c              3   D   K   | ]  }t        z  d         ywr   )r   r   Nr   ).0_r\   out_chr^   s     r)   	<genexpr>z Head.__init__.<locals>.<genexpr>T   s<       a  FG#FF4NQW4Wdjq  A  A  a    )r7   r8   r%   
ModuleListranger<   projectszipr:   _make_upsamplerupsample_blocks_make_output_blockoutput_block)r@   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   ri   in_chrj   dim_out_rB   s         ` `     ` r)   r8   zHead.__init__=   sJ    	ot  vB  pC'
jkBII&xQWXbce'
   "}} $'zL"4E'E|#T	.
 .
  % MM$$UQY7 a  KP  Q_  K`  a.
   MM &+
  ##R 1$h0JO]oq  BP+
 '
.
+
s   !DAD
)$D%r   r   c                    t        j                  t        j                  ||dd      t        j                  ||dddd            }|d   j                  j
                  d d d d d dd df   |d   j                  j
                  d d  |S )Nr   )r4   rc   r3   r   rH   r4   rc   r5   r   r   )r%   r:   ConvTranspose2dr<   weightdata)r@   r   r   	upsamplers       r)   rq   zHead._make_upsampler^   s    MM{LaPQRIIlLaSTcno
	 '0l&9&9&>&>q!RaR!|&L	!  #r(   c                     t        j                  t        j                  |dddd      gfdt        |      D        t        j                  d      t        j                  ||d|dz  d       S )	Nr3   r   rH   rx   c              3   D   K   | ]  }t        z  d         ywrf   rg   )rh   ri   r\   r`   r^   s     r)   rk   z*Head._make_output_block.<locals>.<genexpr>i   sF       B  fg 24FHbewHw  EK  R`  a  a  Brl   Tr#   r   )r%   r:   r<   rn   r&   )r@   rX   rY   r\   r_   r`   ra   r^   s      ` ` `r)   rs   zHead._make_output_blockf   s    }}IIf0aSTcno
 B  kp  q@  kA  B
 GGD!
 II('~VWaostat  DO  P	
 	
r(   hidden_statesimagec                    |j                   dd  \  }}|dz  |dz  }}t        j                  t        | j                  |      D 	cg c]A  \  }\  }}	 ||j                  ddd      j                  d||f      j                               C c}	}}d      j                  d      }
t        | j                        D ]  \  }}t        |
j                   d   |
j                   d   ||z  |
j                  |
j                        }|j                  ddd      j                  d      j                  |
j                   d   ddd      }t        j                   |
|gd      }
|D ].  }t        j"                  j$                  j%                  ||
d	
      }
0  t'        j(                  |
||fdd	      }
t        |
j                   d   |
j                   d   ||z  |
j                  |
j                        }|j                  ddd      j                  d      j                  |
j                   d   ddd      }t        j                   |
|gd      }
t+        | j,                  t.        j0                        rE| j,                  D cg c].  }t        j"                  j$                  j%                  ||
d	
      0 }}|S t        j"                  j$                  j%                  | j,                  |
d	
      }|S c c}	}}w c c}w )N   r   r   r   dimrd   )widthheightaspect_ratiodtypedeviceF)use_reentrantbilinear)modealign_corners)shapetorchstackrp   ro   permute	unflatten
contiguoussum	enumeraterr   r	   r   r   	unsqueezeexpandcatutils
checkpointFinterpolate
isinstancert   r%   rm   )r@   r   r   img_himg_wpatch_hpatch_wprojfeatclstokenrE   iblockuvlayeroutputs                   r)   rG   zHead.forwardn   s   {{23'u B; KK.1$--.O
 
*D*4 aA&00Wg4FGRRTU
  #!#* 	
 "$"6"67 	UHAu)AGGBK^cfk^kstszsz  DE  DL  DL  MBAq!$..q188RRPB		1b'q)A UKK**55eQe5TU	U MM!eU^*ER%AGGBKZ_bgZgopovov  @A  @H  @H  IZZ1a **1-44QWWQZRLIIq"g1%d''7dhduduv[`ekk,,77qPU7VvFv  [[++66t7H7H![`6aF5
, ws   AK):3K0)rI   rJ   rK   rL   ListrN   r8   rq   rs   r   TensorrG   rO   rP   s   @r)   rR   rR   <   s     "1*+>J "$  c	
  3i %(    :;    B3 c 
 
s 
X[ 
nq 
  HK 
  ]` 
  ry  zT  rU 
U\\ %,, r(   rR   c                   p    e Zd ZU ej                  ed<   ej                  ed<   dddg ddddd	d
dgddddg dfdedeee	e   f   dede	e   dedede
d   de
d   deeef   dedededef fdZed g dfd!eeeee   f   d"eeeef      d#d fd$       Zd% Zd& Zd' Zd(ej                  d#ej                  fd)Zd*ej                  d+ed#eeej                  f   fd,Z ej8                         	 	 	 	 	 	 d3d*ej                  d-eeej                  f   d.ed+ed/ed0ed1ed#eeej                  f   fd2       Z xZS )4	MoGeModel
image_mean	image_stddinov2_vitb14   rS   rT   r   linearr   i  i	  r   r6         ?)r3   r3   r   r   encoderintermediate_layersrZ   r[   r\   r]   remap_output)FTr   sinhexpsinh_expr^   r   num_tokens_ranger_   r`   ra   mask_thresholdc                 P   t         t        |           |r5d|v r|d   d   dz  |d   d   dz  g}	|d= t        j                  d|        || _        || _        || _        |	| _        || _	        t        t        j                  dt              |      } |d      | _        | j                  j                  d   j                   j"                  j$                  }t'        t)        |t*              r|n
t-        |      ||||||||
||	      | _        t1        j2                  g d
      j5                  dddd      }t1        j2                  g d      j5                  dddd      }| j7                  d|       | j7                  d|       t0        j8                  dk\  r| j;                          y y )Ntrained_area_ranger      r   z8The following deprecated/invalid arguments are ignored: z.dinov2.hub.backbonesF
pretrained)rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   )g
ףp=
?gv/?gCl?r3   )gZd;O?gy&1?g?r   r   z2.0)r7   r   r8   warningswarnr   r   r   r   r   getattr	importlibimport_module__package__backboneblocksattnqkvin_featuresrR   r   rL   lenheadr   tensorviewregister_buffer__version__enable_pytorch_native_sdpa)r@   r   r   rZ   r[   r\   r]   r   r^   r   r_   r`   ra   r   rY   deprecated_kwargs
hub_loaderdim_featurer   r   rB   s                       r)   r8   zMoGeModel.__init__   s   " 	i')#'88$56J$KA$NRY$Y[l  nB  \C  DE  \F  JQ  \Q  $R %&:;MMTUfTghi(#6  0, Y445LkZ\cd
"e4mm**1-2266BB0:;NPS0T,Z]^qZr%'A))+1)
	 \\"78==aAqI
LL!67<<Q1aH	\:6[)4%++- &r(   Npretrained_model_name_or_pathmodel_kwargsreturnc           
      `   t        |      j                         rt        j                  |dd      }n't	        d|ddd|}t        j                  |dd      }|d   }||j                  |        | di |d|i}	 |j                         }	i }
|d   j                         D ]e  \  }}||	v rN|j                  |	|   j                  k(  r||
|<   ,t        d	| d
|j                   d|	|   j                          Xt        d|        g |	j                  |
       |j                  |	       |S )a  
        Load a model from a checkpoint file.

        ### Parameters:
        - `pretrained_model_name_or_path`: path to the checkpoint file or repo id.
        - `model_kwargs`: additional keyword arguments to override the parameters in the checkpoint.
        - `hf_kwargs`: additional keyword arguments to pass to the `hf_hub_download` function. Ignored if `pretrained_model_name_or_path` is a local path.

        ### Returns:
        - A new instance of `MoGe` with the parameters loaded from the checkpoint.
        cpuT)map_locationweights_onlymodelzmodel.pt)repo_id	repo_typefilenamemodel_configrY   zShape mismatch for z: checkpoint shape z, model shape zSkipping missing key: r'   )r   existsr   loadr   updateload_state_dict
state_dictitemsr   print)clsr   r   rY   	hf_kwargsr   cached_checkpoint_pathr   r   model_state_dictnew_state_dictkvs                r)   from_pretrainedzMoGeModel.from_pretrained   s]    -.557$APUdhiJ%4 &5!#& 	&" $:]abJ!.1#-4l474  %//1N"7+113 81((ww"21"5";";;,-q) 3A36I!''R`aqrsatazaz`{|}21#678 ##N3!!"23r(   c                     t         j                  j                  d| j                  d      j	                         }| j
                  j                  |       y)z?Load the backbone with pretrained dinov2 weights from torch hubzfacebookresearch/dinov2Tr   N)r   hubr   r   r   r   r   )r@   r   s     r)   init_weightszMoGeModel.init_weights   s;    YY^^$=t||X\^]hhj
%%j1r(   c                     t        t        | j                  j                              D ];  }t	        | j                  j                  |         | j                  j                  |<   = y rD   )rn   r   r   r   r   r@   r   s     r)   enable_gradient_checkpointingz'MoGeModel.enable_gradient_checkpointing  sP    s4==//01 	gA&MdmmNbNbcdNe&fDMM  #	gr(   c                     t        t        | j                  j                              D ]J  }t	        | j                  j                  |   j
                        | j                  j                  |   _        L y rD   )rn   r   r   r   r   r   r   s     r)   r   z$MoGeModel.enable_pytorch_native_sdpa	  sY    s4==//01 	iA+J4==K_K_`aKbKgKg+hDMM  #(	ir(   pointsc                 &   | j                   dk(  r	 |S | j                   dk(  rt        j                  |      }|S | j                   dk(  rK|j                  ddgd      \  }}t        j                  |      }t        j
                  ||z  |gd      }|S | j                   dk(  rY|j                  ddgd      \  }}t        j
                  t        j                  |      t        j                  |      gd      }|S t        d	| j                          )
Nr   r   r   r   r   rd   r   r   zInvalid remap output type: )r   r   r   splitr   r   r9   )r@   r   xyzs       r)   _remap_pointszMoGeModel._remap_points  s   (  'ZZ'F  %'LL!QRL0EB		!AYYQ{3F  +LL!QRL0EBYY

2		!=2FF  :4;L;L:MNOOr(   r   
num_tokensc                 .   |j                   dd  \  }}||}}|| j                  z
  | j                  z  }t        j                  ||dz  dz  |dz  dz  fddd      }| j
                  j                  || j                  d      }| j                  ||      }	|	S )Nr   r   r   FT)r   r   	antialias)return_class_token)r   r   r   r   r   r   get_intermediate_layersr   r   r   autocastr   typefloat32r   squeezer   )r@   r   r   original_heightoriginal_widthresized_widthresized_heightimage_14featuresr   r   maskreturn_dicts                r)   rG   zMoGeModel.forward  s    */++bc*:'
 )7~ (DNN:==2)=)BMUWDWZ\D\(]dn  D  PT  U ==884C[C[pt8u 8U+r(   fov_xresolution_level
apply_maskforce_projectionuse_fp16c           
      J   |j                         dk(  rd}|j                  d      }nd}|j                  dd \  }	}
|	|
z  }|
|	z  }|&| j                  \  }}t	        ||dz  ||z
  z  z         }t        j                  |j                  j                  t
        j                  |      5  | j                  ||      }ddd       d	   |d
   }}|| j                  kD  }|t        ||      \  }}n|d|dz  z   dz  z  t        j                  t        j                  t        j                  ||j                  |j                         dz              z  }|j"                  dk(  r!|d   j%                  |j                  d         }t        |||      \  }}|dz  d|dz  z   dz  z  |z  }|dz  d|dz  z   dz  z  }t&        j
                  j)                  ||dd      }|d   |d   z   }|r"t&        j
                  j+                  ||      }nL|t        j,                  t        j.                  |      t        j.                  |      |gd      dddddf   z   }|rMt        j0                  |d   |t
        j2                        }t        j0                  ||t
        j2                        }|rU|j5                  d      }|j5                  d      }|j5                  d      }|j5                  d      }|j5                  d      }||||t        j6                  |      d}|S # 1 sw Y   _xY w)a  
        User-friendly inference function

        ### Parameters
        - `image`: input image tensor of shape (B, 3, H, W) or (3, H, W)        - `fov_x`: the horizontal camera FoV in degrees. If None, it will be inferred from the predicted point map. Default: None
        - `resolution_level`: An integer [0-9] for the resolution level for inference. 
            The higher, the finer details will be captured, but slower. Defaults to 9. Note that it is irrelevant to the output size, which is always the same as the input size.
            `resolution_level` actually controls `num_tokens`. See `num_tokens` for more details.
        - `num_tokens`: number of tokens used for inference. A integer in the (suggested) range of `[1200, 2500]`.
            `resolution_level` will be ignored if `num_tokens` is provided. Default: None
        - `apply_mask`: if True, the output point map will be masked using the predicted mask. Default: True
        - `force_projection`: if True, the output point map will be recomputed to match the projection constraint. Default: True
        - `use_fp16`: if True, use mixed precision to speed up inference. Default: True
            
        ### Returns

        A dictionary containing the following keys:
        - `points`: output tensor of shape (B, H, W, 3) or (H, W, 3).
        - `depth`: tensor of shape (B, H, W) or (H, W) containing the depth map.
        - `intrinsics`: tensor of shape (B, 3, 3) or (3, 3) containing the camera intrinsics.
        r3   Tr   Fr   N	   )device_typer   enabledr   r  r   r   r   )r   r   )focal).r   ).NN)
intrinsicsrd   r   .).N)r   r  depthr  	mask_prob)r   r   r   r   rL   r   r  r   r  float16rG   r   r
   tandeg2rad	as_tensorr   ndimr   utils3dintrinsics_from_focal_centerdepth_to_pointsr   
zeros_likewhereinfr  sigmoid)r@   r   r  r  r   r  r  r  omit_batch_dimr  r	  arear   
min_tokens
max_tokensr   r   r  mask_binaryr  shiftri   fxfyr  r  r  s                              r)   inferzMoGeModel.inferA  sG   B 99;!!NOOA&E"N*/++bc*:'/%7%)%:%:"J
Z+;a+?JQ[D[*\\]J^^(9(9X`a 	5\\%4F	5h'T000 =.v{CLE5 A(9$9c#AAEIIemm\a\k\klq  {A  {H  {H  PV  P\  P\  ]]  `a  ]a  Ob  Ec  cEzzQd**6<<?;*6;eLHAuQY!la//C77,FQY!la//C77]]??BSQ
v!77 ]]225Z2PFekk5+;+;E+BEDTDTUZD[]b*ciklmprvx|~m  A  AF [[Y!7KFKKUEII>E^^A&F#++A.JMM!$E%--a0K<<?D $t,
 [	5 	5s   (LL")Nr  NTTT) rI   rJ   rK   r   r   __annotations__rM   UnionrL   r   rN   Tupler   floatr8   classmethodr   IObytesOptionalDictAnyr   r   r   r   r   rG   inference_modeboolr1  rO   rP   s   @r)   r   r      sV   || '56"1*+RZ>J37, "$ #;.;."3S	>2;. ;. 3i	;.
 %(;. ;. NO;.   :;;.  /;. ;.  ;. ;. ;.z  CG  Q] ,E#tRPUYBV<W ,goptuxz}u}p~g ,  ny , ,\2
giELL U\\  "U\\ "s "tCDU?V "H U .2 !!%[||[ VU\\)*[ 	[
 [ [ [ [ 
c5<<	 [ [r(   r   )$typingnumbersr   	functoolsr   pathlibr   r   r   jsonr   torch.nnr%   torch.nn.functional
functionalr   torch.utilstorch.utils.checkpointtorch.versionr"  huggingface_hubr   utils.geometry_torchr	   r
   r   r   r   r   r   utils.toolsr   Moduler   rR   r   r'   r(   r)   <module>rM     s                   + b b G  G  "		 "JQ299 QhM		 Mr(   