"""Build fig5_video.svg — SVD video backbone pipeline + rollout comparison.

Left half: start frame → SVD → generated video frames → fork into two branches:
  - top: global regression (collapses spatial info → vector → xyz, red)
  - bottom: PARA (preserves spatial heatmap → argmax → xyz, green)

Right half: two rollout grids showing success (PARA 92%) vs failure (Global 0%).
"""

import base64
import time

_t = time.time()

def b64(p):
    return base64.b64encode(open(p, "rb").read()).decode()

ASSETS = "/data/cameron/penpot/figures/extracted"
FIG5 = f"{ASSETS}/fig5v2"

start_b64 = b64(f"{ASSETS}/fig1a_rgb_hires.png")
v1_b64 = b64(f"{FIG5}/vid_clean_0.3.png")
v2_b64 = b64(f"{FIG5}/vid_clean_0.8.png")
v3_b64 = b64(f"{FIG5}/vid_clean_1.5.png")
v4_b64 = b64(f"{FIG5}/vid_clean_2.5.png")
rollout_para_b64 = b64("/data/cameron/penpot/figures/extracted/fig4/para_spatial_success.png")
rollout_global_b64 = b64("/data/cameron/penpot/figures/extracted/fig4/act_spatial_fail.png")
pca_b64 = b64(f"{ASSETS}/fig1a_pca_hires.png")

# Colors
GREEN = "#16653a"
RED = "#a12029"
SLATE = "#334155"
GRAY = "#6b7280"
LIGHT = "#e5e7eb"

# ═══════════════════════════════════════════════════════════════════════════
# Geometry
# ═══════════════════════════════════════════════════════════════════════════
#
# Canvas 1400 x 500
# Panel (a): x=0..880 (video pipeline)
# Panel (b): x=880..1400 (rollout grids)
#
# Pipeline main axis y=240 (centered).
# Fork diverges to y=160 (top / global) and y=340 (bottom / PARA).
#
# Elements:
#   start frame:    x=30..140   (110x100, centered y=240)
#   arrow 1:        x=146..170  (→ SVD)
#   SVD box:        x=176..296  (120x60)
#   arrow 2:        x=302..326  (→ video frames)
#   video frames:   x=332..460  (120 wide stack, 4 frames staggered)
#   fork lines:     x=466..484
#   --- top branch ---
#   conv top:       x=490..560  (trapezoid, y=130..190)
#   arrow:          x=566..584
#   vector box:     x=588..620  (30x60)
#   arrow:          x=626..644
#   xyz red:        x=650..760  (110x56)
#   --- bottom branch ---
#   conv bot:       x=490..560  (trapezoid, y=290..350)
#   arrow:          x=566..584
#   heatmap:        x=588..640  (50x50)
#   arrow:          x=646..664
#   argmax box:     x=668..738
#   arrow:          x=744..762
#   xyz green:      x=766..876  (110x56)
#
# Total width panel (a): 876. Panel (b) starts at ~900.

svg = f'''<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
     viewBox="0 0 1400 500" width="1400" height="500"
     font-family="Inter, Arial, sans-serif">
  <defs>
    <marker id="arrow-slate" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="6" markerHeight="6" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{SLATE}"/>
    </marker>
    <marker id="arrow-green" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="6" markerHeight="6" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{GREEN}"/>
    </marker>
    <marker id="arrow-red" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="6" markerHeight="6" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{RED}"/>
    </marker>
    <filter id="card-shadow" x="-10%" y="-10%" width="120%" height="130%">
      <feDropShadow dx="0" dy="1" stdDeviation="2" flood-color="#000" flood-opacity="0.06"/>
    </filter>
  </defs>

  <rect width="1400" height="500" fill="#ffffff"/>

  <!-- ══════════════════════════════════════════════════════════════
       PANEL (a) — Video → Action pipeline
       ══════════════════════════════════════════════════════════════ -->
  <text x="30" y="32" font-size="14" font-weight="700" fill="{GRAY}" letter-spacing="0.02em">
    (a) Video backbone → action head
  </text>
  <line x1="30" y1="40" x2="876" y2="40" stroke="{LIGHT}" stroke-width="1"/>

  <!-- Start frame -->
  <clipPath id="clip-start"><rect x="30" y="190" width="110" height="100" rx="6"/></clipPath>
  <image xlink:href="data:image/png;base64,{start_b64}"
         x="30" y="190" width="110" height="100" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-start)"/>
  <rect x="30" y="190" width="110" height="100" rx="6" fill="none" stroke="{LIGHT}" stroke-width="1.5"/>
  <text x="85" y="308" text-anchor="middle" font-size="10" font-weight="600" fill="{GRAY}">start frame</text>

  <!-- Arrow 1: start → SVD -->
  <line x1="146" y1="240" x2="170" y2="240" stroke="{SLATE}" stroke-width="2.5" marker-end="url(#arrow-slate)"/>

  <!-- SVD box -->
  <rect x="176" y="210" width="120" height="60" rx="10" fill="#eef2ff" stroke="#6366f1" stroke-width="2"/>
  <!-- film icon (sprocket holes) -->
  <g transform="translate(188, 222)">
    <rect x="0" y="0" width="22" height="16" rx="1" fill="none" stroke="#6366f1" stroke-width="1.2"/>
    <rect x="2" y="2" width="3" height="3" fill="#6366f1"/>
    <rect x="17" y="2" width="3" height="3" fill="#6366f1"/>
    <rect x="2" y="11" width="3" height="3" fill="#6366f1"/>
    <rect x="17" y="11" width="3" height="3" fill="#6366f1"/>
    <line x1="7" y1="3" x2="15" y2="3" stroke="#6366f1" stroke-width="0.8"/>
    <line x1="7" y1="8" x2="15" y2="8" stroke="#6366f1" stroke-width="0.8"/>
    <line x1="7" y1="13" x2="15" y2="13" stroke="#6366f1" stroke-width="0.8"/>
  </g>
  <text x="236" y="236" font-size="11" font-weight="800" fill="#6366f1">Stable</text>
  <text x="236" y="250" font-size="11" font-weight="800" fill="#6366f1">Video</text>
  <text x="236" y="264" font-size="11" font-weight="800" fill="#6366f1">Diffusion</text>

  <!-- Arrow 2: SVD → video frames -->
  <line x1="302" y1="240" x2="326" y2="240" stroke="{SLATE}" stroke-width="2.5" marker-end="url(#arrow-slate)"/>

  <!-- Video frames (4 stacked/staggered from back to front) -->
  <g id="video-frames">
    <!-- frame 1 (back) -->
    <clipPath id="clip-v1"><rect x="332" y="178" width="92" height="84" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{v1_b64}"
           x="332" y="178" width="92" height="84" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v1)"/>
    <rect x="332" y="178" width="92" height="84" rx="5" fill="none" stroke="{LIGHT}" stroke-width="1.5"/>
    <!-- frame 2 -->
    <clipPath id="clip-v2"><rect x="346" y="192" width="92" height="84" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{v2_b64}"
           x="346" y="192" width="92" height="84" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v2)"/>
    <rect x="346" y="192" width="92" height="84" rx="5" fill="none" stroke="{LIGHT}" stroke-width="1.5"/>
    <!-- frame 3 -->
    <clipPath id="clip-v3"><rect x="360" y="206" width="92" height="84" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{v3_b64}"
           x="360" y="206" width="92" height="84" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v3)"/>
    <rect x="360" y="206" width="92" height="84" rx="5" fill="none" stroke="{LIGHT}" stroke-width="1.5"/>
    <!-- frame 4 (front) -->
    <clipPath id="clip-v4"><rect x="374" y="220" width="92" height="84" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{v4_b64}"
           x="374" y="220" width="92" height="84" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v4)"/>
    <rect x="374" y="220" width="92" height="84" rx="5" fill="none" stroke="{SLATE}" stroke-width="1.8"/>
  </g>
  <text x="420" y="322" text-anchor="middle" font-size="10" font-weight="600" fill="{GRAY}">generated video (T frames)</text>

  <!-- Fork lines: from video-frame stack center (466, 262) to top/bottom branch entry -->
  <line x1="466" y1="262" x2="484" y2="160" stroke="{SLATE}" stroke-width="1.5" stroke-dasharray="4,3" stroke-opacity="0.6"/>
  <line x1="466" y1="262" x2="484" y2="320" stroke="{SLATE}" stroke-width="1.5" stroke-dasharray="4,3" stroke-opacity="0.6"/>

  <!-- ─────── TOP BRANCH: Global Regression (baseline) ─────── -->
  <g id="global-branch">
    <!-- conv trapezoid (narrows right) -->
    <path d="M 490 130 L 560 145 L 560 175 L 490 190 Z"
          fill="#fef2f2" stroke="{RED}" stroke-width="2"/>
    <text x="525" y="165" text-anchor="middle" font-size="11" font-weight="800" fill="{RED}">Conv</text>

    <!-- Arrow to vector -->
    <line x1="566" y1="160" x2="584" y2="160" stroke="{RED}" stroke-width="2.5" marker-end="url(#arrow-red)"/>

    <!-- Vector box: thin tall rect suggesting flat collapsed vector -->
    <rect x="588" y="140" width="24" height="40" rx="3" fill="#fef2f2" stroke="{RED}" stroke-width="1.8"/>
    <!-- Vector cells -->
    <line x1="588" y1="150" x2="612" y2="150" stroke="{RED}" stroke-width="0.8"/>
    <line x1="588" y1="160" x2="612" y2="160" stroke="{RED}" stroke-width="0.8"/>
    <line x1="588" y1="170" x2="612" y2="170" stroke="{RED}" stroke-width="0.8"/>
    <text x="600" y="196" text-anchor="middle" font-size="9" font-weight="600" fill="{GRAY}">global vec</text>

    <!-- Arrow to xyz -->
    <line x1="618" y1="160" x2="644" y2="160" stroke="{RED}" stroke-width="2.5" marker-end="url(#arrow-red)"/>

    <!-- (x, y, z) red -->
    <rect x="650" y="132" width="110" height="56" rx="10" fill="#fef2f2" stroke="{RED}" stroke-width="2"/>
    <text x="705" y="158" text-anchor="middle" font-size="16" font-weight="800" fill="{RED}">(x, y, z)</text>
    <text x="705" y="178" text-anchor="middle" font-size="10" font-weight="700" fill="{RED}">Global Reg.</text>

    <!-- Branch label -->
    <text x="625" y="112" text-anchor="middle" font-size="10" font-weight="700" fill="{RED}" letter-spacing="0.06em">
      COLLAPSE SPATIAL
    </text>
  </g>

  <!-- ─────── BOTTOM BRANCH: PARA ─────── -->
  <g id="para-branch">
    <!-- conv straight rectangle — same shape in/out, illustrates that PARA
         preserves spatial resolution (no narrowing like the global branch) -->
    <rect x="490" y="290" width="70" height="60" rx="4"
          fill="#f0fdf4" stroke="{GREEN}" stroke-width="2"/>
    <text x="525" y="325" text-anchor="middle" font-size="11" font-weight="800" fill="{GREEN}">Conv</text>

    <!-- Arrow to heatmap -->
    <line x1="566" y1="320" x2="584" y2="320" stroke="{GREEN}" stroke-width="2.5" marker-end="url(#arrow-green)"/>

    <!-- Heatmap: spatial grid PCA thumbnail -->
    <clipPath id="clip-hm"><rect x="588" y="294" width="52" height="52" rx="4"/></clipPath>
    <image xlink:href="data:image/png;base64,{pca_b64}"
           x="588" y="294" width="52" height="52" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-hm)"/>
    <rect x="588" y="294" width="52" height="52" rx="4" fill="none" stroke="{GREEN}" stroke-width="1.8"/>
    <text x="614" y="362" text-anchor="middle" font-size="9" font-weight="600" fill="{GRAY}">heatmap vol.</text>

    <!-- Arrow to argmax -->
    <line x1="646" y1="320" x2="664" y2="320" stroke="{GREEN}" stroke-width="2.5" marker-end="url(#arrow-green)"/>

    <!-- argmax box -->
    <rect x="668" y="302" width="70" height="36" rx="6" fill="#f0fdf4" stroke="{GREEN}" stroke-width="2"/>
    <text x="703" y="325" text-anchor="middle" font-size="12" font-weight="800" font-style="italic" fill="{GREEN}">argmax</text>

    <!-- Arrow to xyz -->
    <line x1="744" y1="320" x2="762" y2="320" stroke="{GREEN}" stroke-width="2.5" marker-end="url(#arrow-green)"/>

    <!-- (x, y, z) green -->
    <rect x="766" y="292" width="110" height="56" rx="10" fill="#f0fdf4" stroke="{GREEN}" stroke-width="2"/>
    <text x="821" y="318" text-anchor="middle" font-size="16" font-weight="800" fill="{GREEN}">(x, y, z)</text>
    <text x="821" y="338" text-anchor="middle" font-size="10" font-weight="700" fill="{GREEN}">PARA (Ours)</text>

    <!-- Branch label -->
    <text x="683" y="380" text-anchor="middle" font-size="10" font-weight="700" fill="{GREEN}" letter-spacing="0.06em">
      PRESERVE SPATIAL
    </text>
  </g>

  <!-- Shared conv note under the fork -->
  <text x="270" y="408" font-size="11" font-weight="500" fill="{GRAY}" font-style="italic">
    same video features · different action head
  </text>

  <!-- Divider between panels -->
  <line x1="900" y1="30" x2="900" y2="470" stroke="{LIGHT}" stroke-width="1"/>

  <!-- ══════════════════════════════════════════════════════════════
       PANEL (b) — Rollout grid results
       ══════════════════════════════════════════════════════════════ -->
  <text x="920" y="32" font-size="14" font-weight="700" fill="{GRAY}" letter-spacing="0.02em">
    (b) Rollout results · 20 demos
  </text>
  <line x1="920" y1="40" x2="1380" y2="40" stroke="{LIGHT}" stroke-width="1"/>

  <!-- PARA success rollout (green) -->
  <g id="para-results">
    <rect x="920" y="60" width="220" height="200" rx="10" fill="#ffffff" stroke="{GREEN}" stroke-width="2.5" filter="url(#card-shadow)"/>
    <clipPath id="clip-rp"><rect x="930" y="70" width="200" height="150" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{rollout_para_b64}"
           x="930" y="70" width="200" height="150" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-rp)"/>
    <rect x="936" y="76" width="64" height="20" rx="4" fill="{GREEN}"/>
    <text x="968" y="91" text-anchor="middle" font-size="11" font-weight="800" fill="#ffffff">PARA ✓</text>
    <text x="1030" y="240" text-anchor="middle" font-size="30" font-weight="900" fill="{GREEN}" letter-spacing="-0.02em">92%</text>
    <text x="1030" y="257" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}">success rate</text>
  </g>

  <!-- Global regression failure rollout (red) -->
  <g id="global-results">
    <rect x="1160" y="60" width="220" height="200" rx="10" fill="#ffffff" stroke="{RED}" stroke-width="2.5" filter="url(#card-shadow)"/>
    <clipPath id="clip-rg"><rect x="1170" y="70" width="200" height="150" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{rollout_global_b64}"
           x="1170" y="70" width="200" height="150" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-rg)"/>
    <rect x="1176" y="76" width="80" height="20" rx="4" fill="{RED}"/>
    <text x="1216" y="91" text-anchor="middle" font-size="11" font-weight="800" fill="#ffffff">Global Reg. ✗</text>
    <text x="1270" y="240" text-anchor="middle" font-size="30" font-weight="900" fill="{RED}" letter-spacing="-0.02em">0%</text>
    <text x="1270" y="257" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}">success rate</text>
  </g>

  <text x="1150" y="290" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}" font-style="italic">
    identical video model · only the action head differs
  </text>
</svg>
'''

out = "/data/cameron/para/paper/figs/svg/fig5_video.svg"
with open(out, "w") as f:
    f.write(svg)
print(f"[{time.time()-_t:.2f}s] wrote {out} ({len(svg)} bytes)")
