
     h"              	       :   d dl mZ d dlZd dlZd dlmZ d dlZddlmZm	Z	m
Z
mZmZ ddlmZ d dlmZ d dlZd dlmZ  ed	      Zej.                  Z ej2                  e       ej2                  d
      k\  sJ d       ddZ G d deej6                  dddg      Zy)    )deepcopyN)version   )fill_default_argsfreeze_all_paramsis_symmetrized
interleavetranspose_to_landscape)head_factory)get_patch_embed)CroCoNetinfz0.22.0zCOutdated huggingface_hub version, please reinstall requirements.txtc                    |rt        d|        t        j                  | d      }|d   j                  j	                  dd      }d|vr	|d d d	z   }n"|j	                  d
d      j	                  dd      }d|v sJ |rt        d|        t        |      }|j                  |d   d      }|rt        |       |j                  |      S )Nz... loading model fromcpu)map_locationargsManyAR_PatchEmbedPatchEmbedDust3Rlandscape_onlyz, landscape_only=False)  zlandscape_only=Truezlandscape_only=Falsezinstantiating : modelF)strict)printtorchloadr   replaceevalload_state_dictto)
model_pathdeviceverboseckptr   netss          :/home/cameronsmith/repos/controll3r/dust3r/dust3r/model.py
load_modelr)      s    &
3::ju5D<%%&9;MNDt#CRy44||C$,,-BDZ[!T)) '(
t*CDM%8Aa66&>    c                        e Zd ZdZddde efddefdddf fd		Ze fd
       ZddZ fdZ	d Z
d Zd Zd Zd Zd Zd Zd Zd Z xZS )AsymmetricCroCo3DStereoz Two siamese encoders, followed by two decoders.
    The goal is to output 3d points directly, both images in view1's frame
    (hence the asymmetry).   
    pts3dlinearexpr   noneTr   c                     || _         t        |t        	|         | _        t        	|   di | t        | j                        | _         | j                  |||||fi | | j                  |       y )N )
patch_embed_clsr   super__init__
croco_argsr   
dec_blocksdec_blocks2set_downstream_head
set_freeze)
selfoutput_mode	head_type
depth_mode	conf_modefreezer   r3   croco_kwargs	__class__s
            r(   r5   z AsymmetricCroCo3DStereo.__init__:   sn      /+L%':JK(<( $DOO4   iU^obnor*   c                     t         j                  j                  |      rt        |d      S 	 t	        t
        |   |fi |}|S # t        $ r}t        d| d      d }~ww xY w)Nr   )r#   ztried to load z from huggingface, but failed)	ospathisfiler)   r4   r,   from_pretrained	TypeError	Exception)clspretrained_model_name_or_pathkwr   erB   s        r(   rG   z'AsymmetricCroCo3DStereo.from_pretrainedL   ss    77>>78;EJJo5sKLipmop L  o.1N0OOl mnnos   A 	A"AA"c                 >    t        | j                  |||      | _        y )N)r   r3   patch_embed)r;   img_size
patch_sizeenc_embed_dims       r(   _set_patch_embedz(AsymmetricCroCo3DStereo._set_patch_embedW   s    *4+?+?:Wder*   c                     t        |      }t        d |D              s?|j                         D ],  \  }}|j                  d      s|||j	                  dd      <   . t        |   |fi |S )Nc              3   >   K   | ]  }|j                  d         yw)r8   N)
startswith).0ks     r(   	<genexpr>z:AsymmetricCroCo3DStereo.load_state_dict.<locals>.<genexpr>]   s     =11<<.=s   r7   r8   )dictanyitemsrV   r   r4   r    )r;   r%   rL   new_ckptkeyvaluerB   s         r(   r    z'AsymmetricCroCo3DStereo.load_state_dictZ   sm    :==="jjl O
U>>,/INHS[[}EFO w&x6266r*   c                     || _         g | j                  g| j                  | j                  | j                  gd}t	        ||          y )N)r0   maskencoder)r@   
mask_tokenrO   
enc_blocksr   )r;   r@   to_be_frozens      r(   r:   z"AsymmetricCroCo3DStereo.set_freezec   sD    __%)9)94??K

 	,v./r*   c                      y)z No prediction head Nr2   )r;   r   kwargss      r(   _set_prediction_headz,AsymmetricCroCo3DStereo._set_prediction_headl   s    r*   c                 f   |d   |z  dk(  r|d   |z  dk(  sJ d|d|       || _         || _        || _        || _        t	        ||| t        |            | _        t	        ||| t        |            | _        t        | j                  |      | _	        t        | j                  |      | _
        y )Nr   r   z	img_size=z  must be multiple of patch_size=)has_conf)activate)r<   r=   r>   r?   r   booldownstream_head1downstream_head2r
   head1head2)	r;   r<   r=   r   r>   r?   rQ   rP   rL   s	            r(   r9   z+AsymmetricCroCo3DStereo.set_downstream_headp   s    {Z'1,!z1IQ1N 	=xk:zm<	=&"$" ,YTTXYbTc d ,YTTXYbTc d+D,A,AN[
+D,A,AN[
r*   c                     | j                  ||      \  }}| j                  J | j                  D ]  } |||      } | j                  |      }||d fS )N)
true_shape)rO   enc_pos_embedrd   enc_norm)r;   imagerr   xposblks         r(   _encode_imagez%AsymmetricCroCo3DStereo._encode_image   sh    !!%J!?3 !!)) ?? 	CAsA	 MM!#t|r*   c                    |j                   dd  |j                   dd  k(  rq| j                  t        j                  ||fd      t        j                  ||fd            \  }}}|j	                  dd      \  }}|j	                  dd      \  }}	n,| j                  ||      \  }}}| j                  ||      \  }}	}||||	fS )Nr   )dim   )shapery   r   catchunk)
r;   img1img2true_shape1true_shape2outrw   _out2pos2s
             r(   _encode_image_pairsz+AsymmetricCroCo3DStereo._encode_image_pairs   s    ::bc?djjo-,,UYYd|-K-2YY[7QWX-Y[KCa		!	+IC		!	+IC,,T;?KCa ..t[AMD$D#t##r*   c           	      H   |d   }|d   }|j                   d   }|j                  dt        j                  |j                   dd        d    j	                  |d            }|j                  dt        j                  |j                   dd        d    j	                  |d            }t        ||      rP| j                  |d d d   |d d d   |d d d   |d d d         \  }}	}
}t        ||	      \  }}	t        |
|      \  }
}n| j                  ||||      \  }}	}
}||f||	f|
|ffS )Nimgr   rr   r{   r   r}   )r~   getr   tensorrepeatr   r   r	   )r;   view1view2r   r   Bshape1shape2feat1feat2pos1r   s               r(   _encode_symmetrizedz+AsymmetricCroCo3DStereo._encode_symmetrized   sA   U|U|JJqM<djjo)Ft)L)S)STUWX)YZ<djjo)Ft)L)S)STUWX)YZ %''+'?'?SqS	4PSRSPS9V\]`_`]`Vacijmlmjmcn'o$E5$%eU3LE5#D$/JD$'+'?'?dFTZ'[$E5$%$==r*   c                    ||fg}| j                  |      }| j                  |      }|j                  ||f       t        | j                  | j                        D ]H  \  }} |g |d   d d d   || \  }} |g |d   d d d   || \  }}|j                  ||f       J |d= t        t        | j                  |d               |d<   t        | S )Nr   r   )decoder_embedappendzipr7   r8   tuplemapdec_norm)	r;   f1r   f2r   final_outputblk1blk2r   s	            r(   _decoderz AsymmetricCroCo3DStereo._decoder   s    Rz ##RH%doot/?/?@ 	*JD$=,r*4R40=$==EB=,r*4R40=$==EBR)	* O T]]L4D!EFRL!!r*   c                 X    |d   j                   \  }}}t        | d|       } |||      S )Nr   head)r~   getattr)r;   head_numdecout	img_shaper   SDr   s           r(   _downstream_headz(AsymmetricCroCo3DStereo._downstream_head   s7    *""1attH:./FI&&r*   c           	         | j                  ||      \  \  }}\  }}\  }}| j                  ||||      \  }	}
t        j                  j                  j                  d      5  | j                  d|	D cg c]  }|j                          c}|      }| j                  d|
D cg c]  }|j                          c}|      }d d d        j                  d      |d<   |fS c c}w c c}w # 1 sw Y   +xY w)NF)enabledr   r}   r-   pts3d_in_other_view)	r   r   r   cudaampautocastr   floatpop)r;   r   r   r   r   r   r   r   r   dec1dec2tokres1res2s                 r(   forwardzAsymmetricCroCo3DStereo.forward   s    9=9Q9QRWY^9_6.5%,4 ]]5$t<
dZZ^^$$U$3 	S((D,ISSYY[,I6RD((D,ISSYY[,I6RD	S '+hhw&7"#Tz	 -J,I	S 	Ss*   !C*2C 	C*!C%8C* 
C**C3)      i   )__name__
__module____qualname____doc__r   r5   classmethodrG   rS   r    r:   rh   r9   ry   r   r   r   r   r   __classcell__)rB   s   @r(   r,   r,   .   s     %#"SD#.!1c? $!3 $  f70\	$>&"*'r*   r,   dust3rzhttps://github.com/naver/dust3rzimage-to-3d)library_namerepo_urltags)T)copyr   r   rD   	packagingr   huggingface_hub
utils.miscr   r   r   r	   r
   headsr   dust3r.patch_embedr   dust3r.utils.path_to_crocor   models.crocor   r   r   __version__hf_version_numberparser)   PyTorchModelHubMixinr,   r2   r*   r(   <module>r      s      	   p p  . ! !El#// w}}&'=7==+BB j Fi j&d((.
dr*   