
    dPJf7                        d Z ddlZddlZddlmZ ddlmZmZ ddlmZ	 ddl
ZddlZddlZddlZddlZddlmZ ddlmZ ddlmZ ddlZddlmZ dd	lmZmZmZmZmZmZ ddl Z dd
l!m"Z" ddl#m$Z$m%Z% d Z&ddZ'd Z( G d dejR                        Z*d Z+ G d dejR                        Z, G d dejR                        Z- G d dejR                        Z.y)z$Code for pixelnerf and alternatives.    N)nn)	rearrangerepeat)
functional)deepcopy)cm)tqdm)CallableListOptionalTuple	GeneratorDict)defaultdict)
CrossAttn_PositionalEncodingNoFreqFactorc                     t        | d      S )Nz... c x y -> ... (x y) c)r   xs    4/home/cameronsmith/repos/diffusion_example/models.py<lambda>r      s    i"<=     c           	      f    t        | d|"t        | j                  d      dz              S |      S )Nz... (x y) c -> ... c x yg      ?r   )r   intsize)srcr   s     r   r   r      s2    i,FdedmSXXVX\\^M_I`u stu r   c           	         t        j                  | |      }t        j                  t        |      D cg c]  }ddd|z  |z  z  z   c}      }|j	                  d|f      }t        j
                  |       j	                  | df      }t        j                  ||d d d d df   z        |d d d d df<   t        j                  ||d d d d df   z        |d d dd df<   |S c c}w )N   i'     )torchzerostensorrangereshapearangesincos)nd	embeddingjwkts         r   sinusoidal_embeddingr/      s    Aq!I	%(CQq6a!eai00C	DB	QF	BQA'AyyR#A#Y/Ia!e		!b3Q3i-0Ia1f Ds   C"c                   &     e Zd Zd fd	Zd Z xZS )MyBlockc	                    t         t        |           t        j                  |      | _        t        j                  |||||      | _        t        j                  |||||      | _        |t        j                         n|| _
        || _        y N)superr1   __init__r   	LayerNormlnConv2dconv1conv2SiLU
activation	normalize)
selfshapein_cout_ckernel_sizestridepaddingr<   r=   	__class__s
            r   r5   zMyBlock.__init__!   sk    gt%',,u%YYtUKI
YYue[&'J
'1'9"'')z"r   c                     | j                   r| j                  |      n|}| j                  |      }| j                  |      }| j	                  |      }| j                  |      }|S r3   )r=   r7   r9   r<   r:   )r>   r   outs      r   forwardzMyBlock.forward)   sR     NNdggajjjoooc"jjoooc"
r   )   r   r   NT)__name__
__module____qualname__r5   rH   __classcell__rE   s   @r   r1   r1       s    #r   r1   c           	      6   d }g }t        t        |       dz
        D ]S  }|j                  t        j                  | |   | |dz                   |j                  t        j
                                U t        j                  |d d  }|j                  |       |S )Nc                     t        |       t        j                  k(  r;t        | d      r.t        j                  j                  | j                  ddd       y y y )Nweightg        relufan_in)anonlinearitymode)typer   Linearhasattrinitkaiming_normal_rQ   )ms    r   init_weights_normalz%make_net.<locals>.init_weights_normal2   sF    7biiq(#''CfS['\ $  r   r   )r$   lenappendr   rX   ReLU
Sequentialapply)dimsr]   layersinets        r   make_netrh   1   s    ] F3t9Q; !biiQQqS	23bggi ! --
%CII!"Jr   c                   0     e Zd Zd fd	ZddZddZ xZS )DDPMc                    t         t        |           || _        || _        t        j                  |||      | _        d| j                  z
  | _        t        j                  t        t        | j                              D cg c]'  }t        j                  | j                  d |dz          ) c}      | _        t               | _        y c c}w )Nr   )r4   rj   r5   argsn_stepsr!   linspacebetasalphasr#   r$   r_   prod
alpha_barsDenoisingSceneLearnerUnetmodel)r>   rl   rm   min_betamax_betarf   rE   s         r   r5   zDDPM.__init__?   s    dD"$	^^Hh@
$**n,,QVWZ[_[f[fWgQh'iA

4;;vA3F(G'ij.0
 (js   ,C	c                    |d   j                  d      }t        j                  d| j                  |f      j	                         }| j
                  j	                         |   }t        j                  |d         }|j                         d d d d d f   |d   z  d|z
  j                         d d d d d f   |z  z   }| j                  ||      }|||dS )Nrgbr   r   )
noised_imgetaeta_est)	r   r!   randintrm   cudarr   
randn_likesqrtrt   )r>   model_inputrz   br.   a_barnoisy	eta_thetas           r   rH   zDDPM.forwardI   s    
e

!
!!
$ MM!T\\A40557$$&q){512 

QtD-.U1CCq5yFVFVFXYZ[_`deiYiFjmpFppJJuQ'	"yAAr   c                    t        j                  d| j                  d      j                  t         j                        }g }t        j                         5  t        j                  ||||      j                         }t        t        t        | j                              ddd         D ]  \  }}	t        j                  |d      |	z  j                         j                         }
| j                  ||
      }| j                  |	   }| j                   |	   }d|j#                         z  |d|z
  d|z
  j#                         z  |z  z
  z  }|	rMt        j                  ||||      j                         }| j$                  |	   }|j#                         }|||z  z   }||v s|j'                  |        	 ddd       t        j(                  |      S # 1 sw Y   xY w)zjGiven a DDPM model, a number of samples to be generated and a device, returns some newly generated samplesr   
   Nr^   r   )nprn   rm   astypeuintr!   no_gradrandnr}   	enumeratelistr$   oneslongrt   rp   rr   r   ro   r`   stack)r>   	n_sampleschw
frame_idxsintermed_gensr   idxr.   time_tensorr   alpha_talpha_t_barzbeta_tsigma_ts                    r   generate_new_imageszDDPM.generate_new_imagesZ   s   [[DLL"5<<RWWE
]]_ 	> Iq!Q/446A#Dt||)<$=dd$CD >Q$zz)Q7!;AACHHJ JJq+6	++a."ooa0 'AW[@V@V@X0X[d0d,deIq!Q7<<>A!ZZ]F$kkmG GaKA *$m&:&:1&=+>	>6 {{=))7	> 	>s   EGGG)  g-C6?g{Gz?r3   )   r      r   )rJ   rK   rL   r5   rH   r   rM   rN   s   @r   rj   rj   >   s    1B" *r   rj   c                   ,     e Zd Zd fd	Zd Zd Z xZS )rs   c                    t         t        |           t        j                  ||      | _        t        ||      | j
                  j                  _        | j
                  j                  d       | j                  |d      | _        t        j                  t        ddd      t        ddd      t        ddd            | _        t        j                  ddddd      | _        | j                  |d      | _        t        j                  t        ddd	      t        d
d	d	      t        d
d	d	            | _        t        j                  d	d	ddd      | _        | j                  |d	      | _        t        j                  t        dd	d      t        ddd      t        ddd            | _        t        j                  t        j                  dddd      t        j,                         t        j                  ddddd            | _        | j                  |d      | _        t        j                  t        ddd	      t        dd	d	      t        dd	d            | _        t        j                  t        j4                  ddddd      t        j,                         t        j4                  dddd            | _        | j                  |d      | _        t        j                  t        ddd      t        ddd	      t        dd	d	            | _        t        j4                  d	d	ddd      | _        | j                  |d      | _        t        j                  t        ddd	      t        d
d	d      t        ddd            | _         t        j4                  ddddd      | _!        | j                  |d	      | _"        t        j                  t        dd	d      t        ddd      t        dddd            | _#        t        j                  ddddd      | _$        y )NFr   )r   r   r   r   )r   r   r   r   r    )r      r      )r   r   r   )r      r   (   )r   r   r   )r   rI   rI   )r   rI   rI   P   )r   r   r   )r   r   r   )r   r   r   )r=   rI   )%r4   rs   r5   r   	Embedding
time_embedr/   rQ   datarequires_grad__make_tete1rb   r1   b1r8   down1te2b2down2te3b3r;   down3te_midb_midConvTranspose2dup1te4b4up2te5b5up3te_outb_outconv_out)r>   rm   time_emb_dimrE   s      r   r5   z"DenoisingSceneLearnerUnet.__init__}   sN   '79 ,,w=&:7L&Q#&&u- ==q1--KB'L"b)L"b)

 YYr2q!Q/
==r2--L"b)L"b)L"b)

 YYr2q!Q/
==r2--JB'JB'JB'

 ]]IIb"a#GGIIIb"aA&

 mmL"5]]JB'JB'JB'

 ==r2q!Q/GGIr2q!,
 ==r2--JB'JB'JB'
 %%b"aA6==r2--L"b)L"b)L"b)
 %%b"aA6mmL"5]]L"b)L"b)L"bE:

 		"aAq1r   c           	         | j                  |      }t        |      }| j                  || j                  |      j	                  |ddd      z         }| j                  | j                  |      | j                  |      j	                  |ddd      z         }| j                  | j                  |      | j                  |      j	                  |ddd      z         }| j                  | j                  |      | j                  |      j	                  |ddd      z         }t        j                  || j!                  |      fd      }| j#                  || j%                  |      j	                  |ddd      z         }t        j                  || j'                  |      fd      }	| j)                  |	| j+                  |      j	                  |ddd      z         }	t        j                  || j-                  |	      fd      }
| j/                  |
| j1                  |      j	                  |ddd      z         }
| j3                  |
      }
|
S )Nr^   r   )dim)r   r_   r   r   r%   r   r   r   r   r   r   r   r   r   r!   catr   r   r   r   r   r   r   r   r   r   )r>   r   r.   r)   out1out2out3out_midout4out5rG   s              r   rH   z!DenoisingSceneLearnerUnet.forward   s   OOAFwwq488A;..q"a;;<wwtzz$'$((1+*=*=aQ*JJKwwtzz$'$((1+*=*=aQ*JJK**TZZ-A0F0Fq"aQR0SSTyy$ 12:wwtdhhqk11!RA>>?yy$/Q7wwtdhhqk11!RA>>?iitxx~.A6jjt{{1~55aQBBCmmC 
r   c                     t        j                  t        j                  ||      t        j                         t        j                  ||            S r3   )r   rb   rX   r;   )r>   dim_indim_outs      r   r   z"DenoisingSceneLearnerUnet._make_te   s8    }}IIfg&GGIIIgw'
 	
r   )r   d   )rJ   rK   rL   r5   rH   r   rM   rN   s   @r   rs   rs   |   s    K2Z.
r   rs   c                   D     e Zd Z fdZd Zi dfdZi fdZi fdZ xZS )SceneLearnerc                    t         |           || _        d}t        j                  d|      | _        t        |ddg      | _        t        |dz  dddg      | _        t        j                  j                  dd| j                  j                         | _        | j                  j                  j                  | _        t        j"                         x| j                   d<   | j                  j                  _        t%        j&                         | _        |j*                  }g d	d |j,                   | _        t        j.                  | j,                  D cg c]  }t        j0                  d
||dz  z          c}      j3                         | _        | j4                  D ].  }t        j6                  j9                  |j:                  dd       0 d}t=        d|      | _        t        j                  |dz  dz   |      | _         t        j.                  tC        d      D cg c]  }tE        |dd       c}      j3                         | _#        t        |ddg      | _        t        |ddg      | _$        y c c}w c c}w )N@   i   r   r       zintel-isl/MiDaSMiDaS_small)
pretrained)r   r         i  r   g{Gz?)meanstd   r   rI   )%r4   r5   rl   r   rX   fmap_downprojrh   	depth_estcorr_weighter_perpointr!   hubloadscratch_netmidasscratchoutput_conv	midas_outIdentityconv_modulesPixelNeRFEncoderimg_encfdimspatial_dims
ModuleListr   r}   latentsrZ   normal_rQ   r   posenc	pix_embedr$   r   decoderrgb_est)r>   rl   r   sr   n_freq_rE   s          r   r5   zSceneLearner.__init__   s   	 YYs40!41+.&.Qs2a/@&A#YY^^$5}UYU^U^UjUjQj^k
zz))559;Fq$**,,8#446YY&'9(9(9:}}TM^M^%_bll4ad&C%_`eegJArwwqxxaTJJ 4Qv>6!8A:d3}}eAh%Oja&<%OPUUW!41+.b, &` &Ps   #I2I7c                 ^   |d   j                   dd  }t        d       }t        j                         5  t	        t        j
                  |d   |d   z        j                  d            D ]r  \  }} | ||      }t        j                  |d   |d   gd      |d<   t        j                  |d   |d   gd	      |d<   t        j                  |d
   |d
   gd	      |d
<   t 	 d d d        |d   j                  d	|      t        |d   |d         t        |d
   d|d         dS # 1 sw Y   DxY w)Nrx   r   c                  H    t        j                  g       j                         S r3   )r!   r#   r}    r   r   r   z.SceneLearner.render_full_img.<locals>.<lambda>  s    ell2&6&;&;&= r   r   r   r   )
render_pixdepthr^   masksz b l1 l2 1 (x y) -> b l1 l2 1 x yr   )r   rx   r   )r?   r   r!   r   r   r&   chunkr   	unflattench_fstr   )r>   r   imsizeout_allr,   pixrG   s          r   render_full_imgzSceneLearner.render_full_img	  s@   5!'',=>]]_ 	O"5<<q	&)0C#D#J#J2#NO O#;#6$yy'%.U)DRH!&GG,<S\+J2!N !&GG,<S\+J2!N 	O	O G$..r&9q	2gg./QTZ[\T]^ 		O 	Os   B,D##D,Nc                 X   |d   j                   dd  }t        |d         t        | j                        dz   }}|!t        j                  |d   |d   z        d d }t        | j                  dd      |   }t        | j                  | j                  |            d||      }d|vrt        | j                  | j                        D 	
cg c]>  \  }	}
t         |
|d	   j                  d
            j                  d
|	dz  d
f      |	      @ }}	}
|j                  | j!                  |d                ||d<   t        j"                  |d   D 
cg c],  }
t%        t'        |
|d    j)                  |d
d
d
                  . c}
d      }t        t        j"                  t+        |      D cg c]:  }t        j,                  t+        |      D cg c]  }t/        ||k         c}      < c}}      j1                         d|      }t3        t'        |j                  dd      |d   j)                  ||dz  z  d
d
d
            d||      }t        |d|j5                  d            }||z  }|}| j6                  D ]  } |||      } t3        |d      }t9        j:                  | j=                  |            j                  d
      }t9        j>                  | jA                  |            }||||jC                  ddddd      |j1                         dz  S c c}
}	w c c}
w c c}w c c}}w )Nrx   r   r   r   i  zxy c -> b xy l 1 c)lr   r   r   r^   r    zx y -> b x y 1 1 1)r   )NNz!(b l1  l2) 1 1 xy -> b xy l1 l2 1)r   l1zb xy l c -> b xy l2 l c)l2zb xy l1 1 c -> b l1 xy crI   r   )r   rx   r   r   )"r?   r_   r   r!   randperm	make_xpixflattenr   r   r   zipr   r   squeezer   r`   r   r   ch_sec	grid_sampexpandr$   r#   floatr}   r   r   r   Fsoftplusr   sigmoidr   permute)r>   r   rG   r   r  r   n_latentxpixcrdsr   latentr   r,   rf   lowres_maskshires_masksfmap
cross_attnr   rx   s                       r   rH   zSceneLearner.forward  s<   5!'',U+,S1B1B-CA-E(ENN6!9VAY;N,OPUQU,Vz &!))!A.z:dnnT[[%678LxZ[\ K'pstx  uF  uF  GK  GS  GS  qT  Udldeflvf[%7%?%?%CDNNrSTVWSWXZR[\]^_  UG  UNNDLLU);<>%,K	"++epqze{|[avitDz7H7H2bQS7T&UV|}~ ekkinowix*yde5<<eT\o8^q!t8^+_*yz  B  CW  Z[  \ )L,@,@1,Ed9oF\F\]^_gij_j]jkmnpqsFt"u  y\  _`  dl  m !:gll1oNK' ,,GJz'$/GG89 

4>>$/088<iiT*+''!Aa2#*	
 
 	
3 U } 9_*ys%   AL1L*!L&L! 	L&!L&c                    |d   j                   dd  }| j                  |d   j                  d            }| j                  t	        | j                  dd      d    j                  t        |d         dd      j                               }| j                  |j                  d|      d d dd ddd df   j                  dd            }| j                  D ]  } ||d d d f   |      } t        j                  t        ||d   dz        |d	      }t        j                  t        | j!                  t#        j$                  |t'        |      fd            |d               j                  d      }t        j(                  t        | j+                  t#        j$                  |t'        |      fd            |d               }	|||	d
z  S )Nrx   r   r   r^   r   r   r    bilinearrV   )r   rx   )r?   latent_codesr  r   r
  r  r  r_   r}   r   r   r   r  interpolater   r  r   r!   r   r  r  r   )
r>   r   rG   r  r!  r  r  r  r   rx   s
             r   forward_autodecoder_globalz'SceneLearner.forward_autodecoder_globalE  s   5!'',((U);)C)CB)GH{{9f-55a:4@GGKX]L^H_`bcefkkmn ~~dnnQv6qAadd{CKKAaPQ,,TJz,qv:Nt/TT }}VDA6v:N

6$..D;Nr1R"STZ[\T]^_gghijiit||EIItF4L6I",MNvVWyYZ
 
 	
r   c           	         |d   j                   dd  }| j                  |d         }t        | j                  t	        |            |j                  d            }t        j                  ||d      }t        j                  t        | j                  t	        |            |d               j                  d      }|d|iz  S )Nrx   r   r  r   r   r   r   )r?   r   r   r   r  r   r  r"  r  r   r  )r>   r   rG   r  r  r   s         r   forward_unetzSceneLearner.forward_unetZ  s    5!'',||K./d((6tyy}E}}T&j9

6$..">vayIJRRSTUE
 
 	
r   )	rJ   rK   rL   r5   r  rH   r#  r%  rM   rN   s   @r   r   r      s.    ->  (*d *
X ;= 
* -/ 
r   r   r3   )/__doc__r!   torchvisionr   einopsr   r   torch.nnr   r  numpyr   sysrandomtimeoscopyr   matplotlib.pyplotpyplotplt
matplotlibr   wandbr	   typingr
   r   r   r   r   r   r   collectionsr   attn_modulesr   r   r  r   r/   Moduler1   rh   rj   rs   r   r   r   r   <module>r9     s    *   $ $         C C  # B >u	bii "<*299 <*|j
		 j
Z|
299 |
r   