"""Build fig2a_method.svg — pipeline diagram (left half of the old fig 2).

Shows: RGB input → DINO → DINO features → highlighted pixel → 1×1 Conv →
32 ray logits → unproject into the LIBERO scene → three viewers showing
① single ray, ② full volume + argmax, ③ robot servos to the target.

Uses a synthetic ray-logit distribution so the build is offline.
"""

import base64
import os
import time
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

_t = time.time()

# ═══════════════════════════════════════════════════════════════════════════
# Ray-logit probabilities (cached or synthetic)
# ═══════════════════════════════════════════════════════════════════════════

CACHE = "/tmp/fig2_vol_cache.npz"
if os.path.exists(CACHE):
    d = np.load(CACHE)
    vol_np = d["vol"]
    peak_hvu = tuple(int(x) for x in d["peak"])
else:
    hs, vs, us = np.meshgrid(np.arange(32), np.arange(64), np.arange(64), indexing="ij")
    d2 = ((hs - 11) / 3.0) ** 2 + ((vs - 32) / 5.0) ** 2 + ((us - 34) / 5.0) ** 2
    main = np.exp(-d2)
    background = 0.02 * np.random.RandomState(0).rand(32, 64, 64)
    vol_np = (main + background) / (main + background).sum()
    peak_idx = int(vol_np.argmax())
    peak_hvu = (peak_idx // (64 * 64), (peak_idx // 64) % 64, peak_idx % 64)
h_peak, v_peak, u_peak = peak_hvu
ray_probs = vol_np[:, v_peak, u_peak]
ray_probs_norm = ray_probs / ray_probs.max()

# Ray-logit strip SVG (32 horizontal cells, turbo colormap)
STRIP_X_LEFT = 500
STRIP_Y_TOP = 197
STRIP_H = 36
N_BINS = 32
cell_w = 126 / N_BINS  # strip width 126
turbo = plt.cm.turbo
strip_cells = []
for i in range(N_BINS):
    p = float(ray_probs_norm[i])
    x = STRIP_X_LEFT + i * cell_w
    c = turbo(p)
    r, g, b = int(c[0] * 255), int(c[1] * 255), int(c[2] * 255)
    strip_cells.append(
        f'<rect x="{x:.2f}" y="{STRIP_Y_TOP}" width="{cell_w + 0.2:.2f}" '
        f'height="{STRIP_H}" fill="rgb({r},{g},{b})"/>')
strip_svg = "\n      ".join(strip_cells)


def b64(p):
    return base64.b64encode(open(p, "rb").read()).decode()


ASSETS = "/data/cameron/penpot/figures/extracted"
rgb_b64 = b64(f"{ASSETS}/fig1a_rgb_hires.png")
pca_b64 = b64(f"{ASSETS}/fig1a_pca_hires.png")
frustum_b64 = b64(f"{ASSETS}/fig2v3/frustum_ray.png")
volume_b64 = b64(f"{ASSETS}/fig2v3/volume_highlight_crop.png")
servo_b64 = b64(f"{ASSETS}/fig2v3/robot_servo_crop.png")

GREEN = "#16653a"
GRAY = "#6b7280"
SLATE = "#334155"
LIGHT = "#e5e7eb"
DINO_ORANGE = "#d97e1f"
HL = "#facc15"

svg = f'''<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
     viewBox="0 0 1400 460" width="1400" height="460"
     font-family="Inter, Arial, sans-serif">
  <defs>
    <marker id="arrow-slate" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="7" markerHeight="7" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{SLATE}"/>
    </marker>
    <marker id="arrow-gray" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="7" markerHeight="7" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="#9ca3af"/>
    </marker>
    <marker id="arrow-hl" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="7" markerHeight="7" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{HL}"/>
    </marker>
    <marker id="arrow-green" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="7" markerHeight="7" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{GREEN}"/>
    </marker>
    <filter id="card-shadow" x="-10%" y="-10%" width="120%" height="130%">
      <feDropShadow dx="0" dy="1" stdDeviation="2" flood-color="#000" flood-opacity="0.06"/>
    </filter>
  </defs>

  <rect width="1400" height="460" fill="#ffffff"/>

  <text x="30" y="32" font-size="14" font-weight="700" fill="{GRAY}" letter-spacing="0.02em">
    Pixel-Aligned Heatmap Volume
  </text>
  <line x1="30" y1="40" x2="1370" y2="40" stroke="{LIGHT}" stroke-width="1"/>

  <!-- Pipeline: RGB → DINO → features → pixel → conv → strip → unproject → viewers -->

  <!-- (1) Input RGB -->
  <clipPath id="clip-rgb"><rect x="20" y="170" width="90" height="90" rx="5"/></clipPath>
  <image xlink:href="data:image/png;base64,{rgb_b64}"
         x="20" y="170" width="90" height="90" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-rgb)"/>
  <rect x="20" y="170" width="90" height="90" rx="5" fill="none" stroke="{LIGHT}" stroke-width="1.5"/>
  <text x="65" y="278" text-anchor="middle" font-size="10" font-weight="600" fill="{GRAY}">RGB input</text>

  <line x1="116" y1="215" x2="136" y2="215" stroke="#9ca3af" stroke-width="2" marker-end="url(#arrow-gray)"/>

  <!-- (2) DINO box -->
  <rect x="144" y="175" width="80" height="80" rx="8" fill="#fffbeb" stroke="{DINO_ORANGE}" stroke-width="2"/>
  <text x="184" y="222" text-anchor="middle" font-size="16" font-weight="800" fill="{DINO_ORANGE}">DINO</text>
  <text x="184" y="278" text-anchor="middle" font-size="10" font-weight="600" fill="{GRAY}">backbone</text>

  <line x1="230" y1="215" x2="250" y2="215" stroke="#9ca3af" stroke-width="2" marker-end="url(#arrow-gray)"/>

  <!-- (3) DINO features (PCA viz) -->
  <text x="318" y="144" text-anchor="middle" font-size="10" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
    DINO FEATURES
  </text>
  <text x="318" y="156" text-anchor="middle" font-size="9" font-weight="500" fill="{GRAY}" font-style="italic">
    64 × 64 × 128
  </text>
  <clipPath id="clip-pca"><rect x="258" y="163" width="120" height="104" rx="5"/></clipPath>
  <image xlink:href="data:image/png;base64,{pca_b64}"
         x="258" y="163" width="120" height="104" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-pca)"/>
  <rect x="258" y="163" width="120" height="104" rx="5" fill="none" stroke="{LIGHT}" stroke-width="1.5"/>

  <!-- Highlighted pixel on PCA -->
  <rect x="331" y="198" width="12" height="12" rx="1" fill="none" stroke="{HL}" stroke-width="2.5"/>
  <rect x="331" y="198" width="12" height="12" rx="1" fill="{HL}" fill-opacity="0.35"/>
  <text x="318" y="280" text-anchor="middle" font-size="10" font-weight="600" fill="{GRAY}">1 pixel feature</text>

  <!-- Arrow from pixel → 1×1 Conv -->
  <line x1="344" y1="205" x2="400" y2="215" stroke="{HL}" stroke-width="2.5" marker-end="url(#arrow-hl)"/>

  <!-- (4) 1×1 Conv box -->
  <rect x="406" y="197" width="64" height="36" rx="6" fill="#fef9c3" stroke="#ca8a04" stroke-width="2"/>
  <text x="438" y="213" text-anchor="middle" font-size="10" font-weight="800" fill="#854d0e">1×1</text>
  <text x="438" y="225" text-anchor="middle" font-size="10" font-weight="800" fill="#854d0e">Conv</text>

  <line x1="476" y1="215" x2="494" y2="215" stroke="{HL}" stroke-width="2.5" marker-end="url(#arrow-hl)"/>

  <!-- (5) Ray-logit strip -->
  <text x="563" y="188" text-anchor="middle" font-size="9" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
    32 RAY LOGITS
  </text>
  <rect x="500" y="197" width="126" height="36" rx="3" fill="#ffffff" stroke="#cbd5e1" stroke-width="1.5"/>
  {strip_svg}
  <text x="501" y="246" font-size="8" font-weight="600" fill="#94a3b8">h=0</text>
  <text x="605" y="246" font-size="8" font-weight="600" fill="#94a3b8">h=31</text>

  <!-- Unproject arrow → yellow ray origin in viewer 1 -->
  <line x1="632" y1="215" x2="707" y2="192" stroke="{GREEN}" stroke-width="2.5" marker-end="url(#arrow-green)"/>
  <text x="660" y="204" text-anchor="middle" font-size="11" font-weight="700" fill="{GREEN}" font-style="italic">
    unproject
  </text>

  <!-- Three LIBERO viewers side-by-side on the right -->
  <g id="viewer-1">
    <rect x="700" y="140" width="200" height="190" rx="10" fill="#ffffff" stroke="{GREEN}" stroke-width="2" filter="url(#card-shadow)"/>
    <clipPath id="clip-v1"><rect x="708" y="148" width="184" height="174" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{frustum_b64}"
           x="708" y="148" width="184" height="174" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v1)"/>
    <text x="800" y="127" text-anchor="middle" font-size="10" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
      ① ONE RAY
    </text>
    <text x="800" y="350" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}">
      ray into scene
    </text>
  </g>

  <line x1="906" y1="235" x2="930" y2="235" stroke="#9ca3af" stroke-width="2" marker-end="url(#arrow-gray)"/>

  <g id="viewer-2">
    <rect x="936" y="140" width="200" height="190" rx="10" fill="#ffffff" stroke="{GREEN}" stroke-width="2" filter="url(#card-shadow)"/>
    <clipPath id="clip-v2"><rect x="944" y="148" width="184" height="174" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{volume_b64}"
           x="944" y="148" width="184" height="174" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v2)"/>
    <text x="1036" y="127" text-anchor="middle" font-size="10" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
      ② FULL VOLUME + ARGMAX
    </text>
    <text x="1036" y="350" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}">
      peak voxel = 3D target
    </text>
  </g>

  <line x1="1142" y1="235" x2="1166" y2="235" stroke="#9ca3af" stroke-width="2" marker-end="url(#arrow-gray)"/>

  <g id="viewer-3">
    <rect x="1172" y="140" width="200" height="190" rx="10" fill="#ffffff" stroke="{GREEN}" stroke-width="2" filter="url(#card-shadow)"/>
    <clipPath id="clip-v3"><rect x="1180" y="148" width="184" height="174" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{servo_b64}"
           x="1180" y="148" width="184" height="174" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v3)"/>
    <text x="1272" y="127" text-anchor="middle" font-size="10" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
      ③ ROBOT MOVES
    </text>
    <text x="1272" y="350" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}">
      servo to 3D target
    </text>
  </g>
</svg>
'''

out = "/data/cameron/para/paper/figs/svg/fig2a_method.svg"
with open(out, "w") as f:
    f.write(svg)
print(f"[{time.time()-_t:.2f}s] wrote {out} ({len(svg)} bytes)")
