"""Build fig2_method.svg — DINO → pixel → 1x1 conv → ray logits → unproject.

Top row pipeline (left to right):
  [RGB] → [DINO] → [PCA features 64×64×128] (highlighted pixel)
         → [1×1 Conv] → [ray-logit strip, blue→red]
         → "unproject" → [LIBERO frustum + ray viewer (still)]

Ray-logit strip uses turbo colormap (blue = low prob, yellow/red = high prob).
Uses synthetic volume cache for ray probabilities (delete /tmp/fig2_vol_cache.npz
to regenerate).
"""

import base64
import os
import time
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

_t = time.time()

# ═══════════════════════════════════════════════════════════════════════════
# Ray probabilities (synthetic)
# ═══════════════════════════════════════════════════════════════════════════

CACHE = "/tmp/fig2_vol_cache.npz"
if os.path.exists(CACHE):
    d = np.load(CACHE)
    vol_np = d["vol"]
    peak_hvu = tuple(int(x) for x in d["peak"])
else:
    hs, vs, us = np.meshgrid(np.arange(32), np.arange(64), np.arange(64), indexing="ij")
    d2 = ((hs - 11) / 3.0) ** 2 + ((vs - 32) / 5.0) ** 2 + ((us - 34) / 5.0) ** 2
    main = np.exp(-d2)
    background = 0.02 * np.random.RandomState(0).rand(32, 64, 64)
    vol_np = (main + background) / (main + background).sum()
    peak_idx = int(vol_np.argmax())
    peak_hvu = (peak_idx // (64*64), (peak_idx // 64) % 64, peak_idx % 64)
h_peak, v_peak, u_peak = peak_hvu

ray_probs = vol_np[:, v_peak, u_peak]  # (32,)
ray_probs_norm = ray_probs / ray_probs.max()
print(f"peak bin = {int(ray_probs.argmax())}  (h={h_peak}, v={v_peak}, u={u_peak})")

# ═══════════════════════════════════════════════════════════════════════════
# Ray-logit strip SVG — horizontal, 32 cells, turbo blue→red
# Size matches the 1×1 Conv box (~120 wide × 40 tall).
# ═══════════════════════════════════════════════════════════════════════════

STRIP_X_LEFT = 500
STRIP_X_RIGHT = 626
STRIP_Y_TOP = 197
STRIP_H = 36
N_BINS = 32
cell_w = (STRIP_X_RIGHT - STRIP_X_LEFT) / N_BINS  # 3.75
turbo = plt.cm.turbo

strip_cells = []
for i in range(N_BINS):
    # i=0 → h=0 (ground) at left; i=31 → h=31 (top) at right
    p = float(ray_probs_norm[i])
    x = STRIP_X_LEFT + i * cell_w
    c = turbo(p)
    r, g, b = int(c[0]*255), int(c[1]*255), int(c[2]*255)
    strip_cells.append(
        f'<rect x="{x:.2f}" y="{STRIP_Y_TOP}" width="{cell_w+0.2:.2f}" '
        f'height="{STRIP_H}" fill="rgb({r},{g},{b})"/>')
strip_svg = '\n      '.join(strip_cells)

# ═══════════════════════════════════════════════════════════════════════════
# Assemble SVG
# ═══════════════════════════════════════════════════════════════════════════

def b64(p):
    return base64.b64encode(open(p, "rb").read()).decode()

ASSETS = "/data/cameron/penpot/figures/extracted"
rgb_b64     = b64(f"{ASSETS}/fig1a_rgb_hires.png")
pca_b64     = b64(f"{ASSETS}/fig1a_pca_hires.png")
frustum_b64 = b64(f"{ASSETS}/fig2v3/frustum_ray.png")
volume_b64  = b64(f"{ASSETS}/fig2v3/volume_highlight_crop.png")
servo_b64   = b64(f"{ASSETS}/fig2v3/robot_servo_crop.png")

# Panel (b): rendered image + metadata (key points' 2D pixel coords)
two_frustums_b64 = b64(f"{ASSETS}/fig2v3/two_frustums.png")
import json as _json
with open(f"{ASSETS}/fig2v3/two_frustums_meta.json") as f:
    _meta = _json.load(f)

# SVG placement for the rendered panel-b image (right 30% of canvas).
# Image size chosen so panel (b) total height matches scaled panel (a).
PB_IMG_X = 1020
PB_IMG_Y = 50
PB_IMG_W = 310
PB_IMG_H = 310  # square; the underlying render is 896x896
_pb_scale = PB_IMG_W / _meta["image_size"]

def _to_svg(pt):
    return (PB_IMG_X + pt[0] * _pb_scale, PB_IMG_Y + pt[1] * _pb_scale)

camA_x, camA_y     = _to_svg(_meta["cam_A_2d"])
camB_x, camB_y     = _to_svg(_meta["cam_B_2d"])
tgt_x, tgt_y       = _to_svg(_meta["target_2d"])
tot_x, tot_y       = _to_svg(_meta["target_on_table_2d"])
rayA_mx, rayA_my   = _to_svg(_meta["ray_A_mid_2d"])
rayB_mx, rayB_my   = _to_svg(_meta["ray_B_mid_2d"])
height_mx, height_my = _to_svg(_meta["height_mid_2d"])

_depth_A_str = f'{_meta["depth_A"]:.2f} m'
_depth_B_str = f'{_meta["depth_B"]:.2f} m'
_height_str  = f'{_meta["height_z"]:.2f} m'

GREEN = "#16653a"
GRAY = "#6b7280"
SLATE = "#334155"
LIGHT = "#e5e7eb"
DINO_ORANGE = "#d97e1f"
HL = "#facc15"  # yellow highlight for the selected pixel

svg = f'''<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
     viewBox="0 0 1400 410" width="1400" height="410"
     font-family="Inter, Arial, sans-serif">
  <defs>
    <marker id="arrow-slate" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="7" markerHeight="7" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{SLATE}"/>
    </marker>
    <marker id="arrow-gray" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="7" markerHeight="7" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="#9ca3af"/>
    </marker>
    <marker id="arrow-hl" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="7" markerHeight="7" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{HL}"/>
    </marker>
    <marker id="arrow-green" viewBox="0 0 10 10" refX="0" refY="5"
            markerWidth="7" markerHeight="7" markerUnits="userSpaceOnUse" orient="auto">
      <path d="M0,0 L10,5 L0,10 Z" fill="{GREEN}"/>
    </marker>
    <filter id="card-shadow" x="-10%" y="-10%" width="120%" height="130%">
      <feDropShadow dx="0" dy="1" stdDeviation="2" flood-color="#000" flood-opacity="0.06"/>
    </filter>
  </defs>

  <rect width="1400" height="410" fill="#ffffff"/>

  <!-- Panel (a) wrapped in a scale transform so the full pipeline +
       three viewers fit in the left 70% of the canvas. -->
  <g id="panel-a-wrap" transform="translate(0, 5) scale(0.72)">

  <text x="30" y="32" font-size="14" font-weight="700" fill="{GRAY}" letter-spacing="0.02em">
    (a) Pixel-Aligned Heatmap Volume
  </text>
  <line x1="30" y1="40" x2="1370" y2="40" stroke="{LIGHT}" stroke-width="1"/>

  <!-- ══════════════════════════════════════════════════════════════
       Pipeline — compact horizontal row centered at y = 215
       Elements: RGB → DINO → PCA (with pixel hl) → 1×1 Conv → ray strip
       ══════════════════════════════════════════════════════════════ -->

  <!-- (1) Input RGB — 90x90 -->
  <clipPath id="clip-rgb"><rect x="20" y="170" width="90" height="90" rx="5"/></clipPath>
  <image xlink:href="data:image/png;base64,{rgb_b64}"
         x="20" y="170" width="90" height="90" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-rgb)"/>
  <rect x="20" y="170" width="90" height="90" rx="5" fill="none" stroke="{LIGHT}" stroke-width="1.5"/>
  <text x="65" y="278" text-anchor="middle" font-size="10" font-weight="600" fill="{GRAY}">RGB input</text>

  <line x1="116" y1="215" x2="136" y2="215" stroke="#9ca3af" stroke-width="2" marker-end="url(#arrow-gray)"/>

  <!-- (2) DINO box — 80x80 -->
  <rect x="144" y="175" width="80" height="80" rx="8" fill="#fffbeb" stroke="{DINO_ORANGE}" stroke-width="2"/>
  <text x="184" y="222" text-anchor="middle" font-size="16" font-weight="800" fill="{DINO_ORANGE}">DINO</text>
  <text x="184" y="278" text-anchor="middle" font-size="10" font-weight="600" fill="{GRAY}">backbone</text>

  <line x1="230" y1="215" x2="250" y2="215" stroke="#9ca3af" stroke-width="2" marker-end="url(#arrow-gray)"/>

  <!-- (3) DINO features (PCA viz) — 120x120 -->
  <text x="318" y="144" text-anchor="middle" font-size="10" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
    DINO FEATURES
  </text>
  <text x="318" y="156" text-anchor="middle" font-size="9" font-weight="500" fill="{GRAY}" font-style="italic">
    64 × 64 × 128
  </text>
  <clipPath id="clip-pca"><rect x="258" y="163" width="120" height="104" rx="5"/></clipPath>
  <image xlink:href="data:image/png;base64,{pca_b64}"
         x="258" y="163" width="120" height="104" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-pca)"/>
  <rect x="258" y="163" width="120" height="104" rx="5" fill="none" stroke="{LIGHT}" stroke-width="1.5"/>

  <!-- Highlighted pixel on PCA -->
  <rect x="331" y="198" width="12" height="12" rx="1" fill="none" stroke="{HL}" stroke-width="2.5"/>
  <rect x="331" y="198" width="12" height="12" rx="1" fill="{HL}" fill-opacity="0.35"/>
  <text x="318" y="280" text-anchor="middle" font-size="10" font-weight="600" fill="{GRAY}">1 pixel feature</text>

  <!-- Arrow from pixel → 1×1 Conv -->
  <line x1="344" y1="205" x2="400" y2="215" stroke="{HL}" stroke-width="2.5" marker-end="url(#arrow-hl)"/>

  <!-- (4) 1×1 Conv box — 64x36 -->
  <rect x="406" y="197" width="64" height="36" rx="6" fill="#fef9c3" stroke="#ca8a04" stroke-width="2"/>
  <text x="438" y="213" text-anchor="middle" font-size="10" font-weight="800" fill="#854d0e">1×1</text>
  <text x="438" y="225" text-anchor="middle" font-size="10" font-weight="800" fill="#854d0e">Conv</text>

  <line x1="476" y1="215" x2="494" y2="215" stroke="{HL}" stroke-width="2.5" marker-end="url(#arrow-hl)"/>

  <!-- (5) Ray-logit strip — same size as 1×1 Conv box -->
  <text x="563" y="188" text-anchor="middle" font-size="9" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
    32 RAY LOGITS
  </text>
  <rect x="500" y="197" width="126" height="36" rx="3" fill="#ffffff" stroke="#cbd5e1" stroke-width="1.5"/>
  {{STRIP}}
  <text x="501" y="246" font-size="8" font-weight="600" fill="#94a3b8">h=0</text>
  <text x="605" y="246" font-size="8" font-weight="600" fill="#94a3b8">h=31</text>

  <!-- Unproject arrow: ends on the yellow ray in viewer 1 -->
  <line x1="632" y1="215" x2="707" y2="192" stroke="{GREEN}" stroke-width="2.5" marker-end="url(#arrow-green)"/>
  <text x="660" y="204" text-anchor="middle" font-size="11" font-weight="700" fill="{GREEN}" font-style="italic">
    unproject
  </text>

  <!-- ══════════════════════════════════════════════════════════════
       Three LIBERO viewers side-by-side on the right.
       Each: card 200x190 with image 184x174 inside, 24px gap between.
       ══════════════════════════════════════════════════════════════ -->

  <!-- Viewer 1: frustum + ray -->
  <g id="viewer-1">
    <rect x="700" y="140" width="200" height="190" rx="10" fill="#ffffff" stroke="{GREEN}" stroke-width="2" filter="url(#card-shadow)"/>
    <clipPath id="clip-v1"><rect x="708" y="148" width="184" height="174" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{frustum_b64}"
           x="708" y="148" width="184" height="174" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v1)"/>
    <text x="800" y="127" text-anchor="middle" font-size="10" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
      ① ONE RAY
    </text>
    <text x="800" y="350" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}">
      ray into scene
    </text>
  </g>

  <!-- Arrow viewer 1 → viewer 2 -->
  <line x1="906" y1="235" x2="930" y2="235" stroke="#9ca3af" stroke-width="2" marker-end="url(#arrow-gray)"/>

  <!-- Viewer 2: full heatmap volume + argmax highlight -->
  <g id="viewer-2">
    <rect x="936" y="140" width="200" height="190" rx="10" fill="#ffffff" stroke="{GREEN}" stroke-width="2" filter="url(#card-shadow)"/>
    <clipPath id="clip-v2"><rect x="944" y="148" width="184" height="174" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{volume_b64}"
           x="944" y="148" width="184" height="174" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v2)"/>
    <text x="1036" y="127" text-anchor="middle" font-size="10" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
      ② FULL VOLUME + ARGMAX
    </text>
    <text x="1036" y="350" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}">
      peak voxel = 3D target
    </text>
  </g>

  <!-- Arrow viewer 2 → viewer 3 -->
  <line x1="1142" y1="235" x2="1166" y2="235" stroke="#9ca3af" stroke-width="2" marker-end="url(#arrow-gray)"/>

  <!-- Viewer 3: robot servo to target -->
  <g id="viewer-3">
    <rect x="1172" y="140" width="200" height="190" rx="10" fill="#ffffff" stroke="{GREEN}" stroke-width="2" filter="url(#card-shadow)"/>
    <clipPath id="clip-v3"><rect x="1180" y="148" width="184" height="174" rx="5"/></clipPath>
    <image xlink:href="data:image/png;base64,{servo_b64}"
           x="1180" y="148" width="184" height="174" preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-v3)"/>
    <text x="1272" y="127" text-anchor="middle" font-size="10" font-weight="700" fill="{SLATE}" letter-spacing="0.04em">
      ③ ROBOT MOVES
    </text>
    <text x="1272" y="350" text-anchor="middle" font-size="10" font-weight="500" fill="{GRAY}">
      servo to 3D target
    </text>
  </g>

  </g>  <!-- /panel-a-wrap -->

  <!-- ══════════════════════════════════════════════════════════════
       PANEL (b) — Height prediction is view-invariant
       Right 30% of the canvas. Image + overlaid depth/height labels.
       ══════════════════════════════════════════════════════════════ -->
  <text x="1010" y="30" font-size="13" font-weight="700" fill="{GRAY}" letter-spacing="0.02em">
    (b) Height is view-invariant
  </text>
  <line x1="1010" y1="38" x2="1390" y2="38" stroke="{LIGHT}" stroke-width="1"/>

  <g id="panel-b">
    <!-- Rendered LIBERO scene with two camera frustums + rays to GT target -->
    <rect x="{PB_IMG_X - 6}" y="{PB_IMG_Y - 6}" width="{PB_IMG_W + 12}" height="{PB_IMG_H + 12}" rx="12"
          fill="#ffffff" stroke="{GREEN}" stroke-width="2.5" filter="url(#card-shadow)"/>
    <clipPath id="clip-pb"><rect x="{PB_IMG_X}" y="{PB_IMG_Y}" width="{PB_IMG_W}" height="{PB_IMG_H}" rx="6"/></clipPath>
    <image xlink:href="data:image/png;base64,{two_frustums_b64}"
           x="{PB_IMG_X}" y="{PB_IMG_Y}" width="{PB_IMG_W}" height="{PB_IMG_H}"
           preserveAspectRatio="xMidYMid slice" clip-path="url(#clip-pb)"/>

    <!-- Camera A label (offset above-left of its 2D position) -->
    <text x="{camA_x - 18}" y="{camA_y + 36}" text-anchor="end" font-size="13" font-weight="800" fill="#2882eb">camera A</text>

    <!-- Camera B label (offset above of its 2D position) -->
    <text x="{camB_x + 22}" y="{camB_y - 8}" font-size="13" font-weight="800" fill="#eb641c">camera B</text>

    <!-- Depth A callout box -->
    <rect x="{rayA_mx - 60}" y="{rayA_my - 52}" width="110" height="22" rx="4"
          fill="#ffffff" stroke="#2882eb" stroke-width="1.8"/>
    <text x="{rayA_mx - 5}" y="{rayA_my - 36}" text-anchor="middle" font-size="12" font-weight="800" fill="#2882eb">
      depth = {_depth_A_str}
    </text>
    <!-- Thin leader line from the callout to the ray mid -->
    <line x1="{rayA_mx - 5}" y1="{rayA_my - 30}" x2="{rayA_mx}" y2="{rayA_my - 4}"
          stroke="#2882eb" stroke-width="1.2"/>

    <!-- Depth B callout box -->
    <rect x="{rayB_mx + 24}" y="{rayB_my - 12}" width="110" height="22" rx="4"
          fill="#ffffff" stroke="#eb641c" stroke-width="1.8"/>
    <text x="{rayB_mx + 79}" y="{rayB_my + 4}" text-anchor="middle" font-size="12" font-weight="800" fill="#eb641c">
      depth = {_depth_B_str}
    </text>
    <line x1="{rayB_mx + 22}" y1="{rayB_my - 1}" x2="{rayB_mx + 4}" y2="{rayB_my + 2}"
          stroke="#eb641c" stroke-width="1.2"/>

    <!-- Height bracket callout (labels the vertical target→ground line) -->
    <rect x="{tot_x + 28}" y="{height_my - 8}" width="130" height="22" rx="4"
          fill="#ffffff" stroke="{GREEN}" stroke-width="1.8"/>
    <text x="{tot_x + 93}" y="{height_my + 8}" text-anchor="middle" font-size="12" font-weight="800" fill="{GREEN}">
      height z* = {_height_str}
    </text>
    <line x1="{tot_x + 26}" y1="{height_my + 3}" x2="{tot_x + 4}" y2="{height_my + 3}"
          stroke="{GREEN}" stroke-width="1.2"/>

    <!-- 3D target label -->
    <text x="{tgt_x - 18}" y="{tgt_y - 14}" text-anchor="end" font-size="12" font-weight="800" fill="{GREEN}">3D target</text>
    <line x1="{tgt_x - 16}" y1="{tgt_y - 11}" x2="{tgt_x - 4}" y2="{tgt_y - 4}"
          stroke="{GREEN}" stroke-width="1.2"/>

    <!-- Summary caption below the image -->
    <text x="{PB_IMG_X + PB_IMG_W/2}" y="{PB_IMG_Y + PB_IMG_H + 18}" text-anchor="middle" font-size="11" font-weight="600" fill="{SLATE}">
      <tspan fill="#2882eb" font-weight="800">{_depth_A_str}</tspan> vs <tspan fill="#eb641c" font-weight="800">{_depth_B_str}</tspan>
      depth, same <tspan fill="{GREEN}" font-weight="800">height z*</tspan>
    </text>
    <text x="{PB_IMG_X + PB_IMG_W/2}" y="{PB_IMG_Y + PB_IMG_H + 36}" text-anchor="middle" font-size="11" font-weight="700" fill="{GREEN}" font-style="italic">
      PARA predicts height → view-invariant
    </text>
  </g>

  <!-- Remove legacy pure-vector panel b content (hidden via dummy wrapper) -->
  <g id="panel-b-legacy" display="none">
    <!-- Ground / robot base line (at y=720) -->
    <line x1="140" y1="720" x2="1260" y2="720" stroke="#8b7355" stroke-width="2.5"/>
    <!-- Wood-texture hatch below ground -->
    <g stroke="#8b7355" stroke-width="1" stroke-opacity="0.55">
      <line x1="140" y1="726" x2="160" y2="734"/>
      <line x1="170" y1="726" x2="190" y2="734"/>
      <line x1="200" y1="726" x2="220" y2="734"/>
      <line x1="230" y1="726" x2="250" y2="734"/>
      <line x1="260" y1="726" x2="280" y2="734"/>
      <line x1="290" y1="726" x2="310" y2="734"/>
      <line x1="320" y1="726" x2="340" y2="734"/>
      <line x1="350" y1="726" x2="370" y2="734"/>
      <line x1="380" y1="726" x2="400" y2="734"/>
      <line x1="410" y1="726" x2="430" y2="734"/>
      <line x1="440" y1="726" x2="460" y2="734"/>
      <line x1="470" y1="726" x2="490" y2="734"/>
      <line x1="500" y1="726" x2="520" y2="734"/>
      <line x1="530" y1="726" x2="550" y2="734"/>
      <line x1="560" y1="726" x2="580" y2="734"/>
      <line x1="590" y1="726" x2="610" y2="734"/>
      <line x1="620" y1="726" x2="640" y2="734"/>
      <line x1="650" y1="726" x2="670" y2="734"/>
      <line x1="680" y1="726" x2="700" y2="734"/>
      <line x1="710" y1="726" x2="730" y2="734"/>
      <line x1="740" y1="726" x2="760" y2="734"/>
      <line x1="770" y1="726" x2="790" y2="734"/>
      <line x1="800" y1="726" x2="820" y2="734"/>
      <line x1="830" y1="726" x2="850" y2="734"/>
      <line x1="860" y1="726" x2="880" y2="734"/>
      <line x1="890" y1="726" x2="910" y2="734"/>
      <line x1="920" y1="726" x2="940" y2="734"/>
      <line x1="950" y1="726" x2="970" y2="734"/>
      <line x1="980" y1="726" x2="1000" y2="734"/>
      <line x1="1010" y1="726" x2="1030" y2="734"/>
      <line x1="1040" y1="726" x2="1060" y2="734"/>
      <line x1="1070" y1="726" x2="1090" y2="734"/>
      <line x1="1100" y1="726" x2="1120" y2="734"/>
      <line x1="1130" y1="726" x2="1150" y2="734"/>
      <line x1="1160" y1="726" x2="1180" y2="734"/>
      <line x1="1190" y1="726" x2="1210" y2="734"/>
      <line x1="1220" y1="726" x2="1240" y2="734"/>
    </g>
    <text x="1270" y="724" font-size="10" font-weight="600" fill="#8b7355">ground / robot base</text>

    <!-- Shared height plane (dashed green) at y=625 -->
    <line x1="140" y1="625" x2="1260" y2="625" stroke="{GREEN}" stroke-width="1.8" stroke-dasharray="8,5" stroke-opacity="0.65"/>
    <text x="148" y="618" font-size="10" font-weight="800" fill="{GREEN}" letter-spacing="0.06em">
      HEIGHT PLANE  z = z*
    </text>

    <!-- Vertical height bracket at the target -->
    <line x1="755" y1="625" x2="755" y2="720" stroke="{GREEN}" stroke-width="1.5"/>
    <line x1="750" y1="625" x2="760" y2="625" stroke="{GREEN}" stroke-width="1.8"/>
    <line x1="750" y1="720" x2="760" y2="720" stroke="{GREEN}" stroke-width="1.8"/>
    <rect x="766" y="662" width="82" height="22" rx="4" fill="#ffffff" stroke="{GREEN}" stroke-width="1.5"/>
    <text x="807" y="677" text-anchor="middle" font-size="11" font-weight="800" fill="{GREEN}">z* = 0.15 m</text>

    <!-- 3D target on the height plane -->
    <circle cx="700" cy="625" r="11" fill="none" stroke="{GREEN}" stroke-width="2.5"/>
    <circle cx="700" cy="625" r="5" fill="{GREEN}"/>
    <text x="678" y="613" text-anchor="end" font-size="11" font-weight="700" fill="{GREEN}">3D target</text>

    <!-- ─── Camera A (close, blue) ─────────────────────────── -->
    <g id="cam-a">
      <rect x="398" y="585" width="42" height="26" rx="3" fill="#334155" stroke="#0f172a" stroke-width="1"/>
      <rect x="403" y="577" width="24" height="10" rx="1.5" fill="#334155"/>
      <circle cx="444" cy="598" r="6" fill="#e5e7eb" stroke="#0f172a" stroke-width="1.2"/>
      <text x="420" y="570" text-anchor="middle" font-size="11" font-weight="800" fill="#2563eb">camera A</text>
    </g>
    <!-- Camera A frustum cone: 2 dashed lines flaring out toward target -->
    <line x1="449" y1="598" x2="695" y2="580" stroke="#94a3b8" stroke-width="1" stroke-dasharray="4,3"/>
    <line x1="449" y1="598" x2="695" y2="685" stroke="#94a3b8" stroke-width="1" stroke-dasharray="4,3"/>
    <!-- Image plane A (small rect perpendicular to the principal direction) -->
    <line x1="510" y1="588" x2="510" y2="618" stroke="#94a3b8" stroke-width="1.3"/>
    <!-- Camera A ray to target -->
    <line x1="449" y1="598" x2="693" y2="621" stroke="#2563eb" stroke-width="3" stroke-opacity="0.95"/>
    <!-- Depth A label -->
    <rect x="516" y="596" width="100" height="22" rx="4" fill="#ffffff" stroke="#2563eb" stroke-width="1.5"/>
    <text x="566" y="611" text-anchor="middle" font-size="11" font-weight="800" fill="#2563eb">depth = 0.35 m</text>

    <!-- ─── Camera B (far + high, orange) ───────────────────── -->
    <g id="cam-b">
      <rect x="1180" y="535" width="44" height="26" rx="3" fill="#334155" stroke="#0f172a" stroke-width="1"/>
      <rect x="1198" y="527" width="24" height="10" rx="1.5" fill="#334155"/>
      <circle cx="1178" cy="548" r="6" fill="#e5e7eb" stroke="#0f172a" stroke-width="1.2"/>
      <text x="1204" y="520" text-anchor="middle" font-size="11" font-weight="800" fill="#ea580c">camera B</text>
    </g>
    <!-- Camera B frustum cone -->
    <line x1="1172" y1="548" x2="690" y2="595" stroke="#94a3b8" stroke-width="1" stroke-dasharray="4,3"/>
    <line x1="1172" y1="548" x2="690" y2="700" stroke="#94a3b8" stroke-width="1" stroke-dasharray="4,3"/>
    <!-- Image plane B -->
    <line x1="1065" y1="588" x2="1032" y2="605" stroke="#94a3b8" stroke-width="1.3"/>
    <!-- Camera B ray to target -->
    <line x1="1172" y1="548" x2="710" y2="623" stroke="#ea580c" stroke-width="3" stroke-opacity="0.95"/>
    <!-- Depth B label (near mid-ray) -->
    <rect x="886" y="576" width="100" height="22" rx="4" fill="#ffffff" stroke="#ea580c" stroke-width="1.5"/>
    <text x="936" y="591" text-anchor="middle" font-size="11" font-weight="800" fill="#ea580c">depth = 0.98 m</text>

    <!-- Caption -->
    <text x="700" y="760" text-anchor="middle" font-size="13" font-weight="600" fill="{SLATE}">
      Same 3D target, very different depths — but the <tspan fill="{GREEN}" font-weight="800">height z*</tspan> above the ground is identical in both views.
    </text>
    <text x="700" y="776" text-anchor="middle" font-size="12" font-weight="700" fill="{GREEN}" font-style="italic">
      PARA predicts height directly → view-invariant.
    </text>
  </g>
</svg>
'''

svg = svg.replace("{STRIP}", strip_svg)

out = "/data/cameron/para/paper/figs/svg/fig2_method.svg"
with open(out, "w") as f:
    f.write(svg)
print(f"[{time.time()-_t:.2f}s] wrote {out} ({len(svg)} bytes)")
