"""Convert smith300 jpg episodes → MP4s for Motion-LoRA's decord-based loader.

Run once. Output:
    <this_dir>/<episode_id>.mp4               — full episodes (decord samples 7-frame windows)
    <this_dir>/validation_images/<id>.jpg     — first frame of each episode for img2vid validation
"""
from pathlib import Path
import json
import shutil
import imageio.v2 as imageio
import numpy as np

SRC = Path("/data/cameron/mac_robot_datasets/dataset_20260501_180125")
DST = Path(__file__).resolve().parent
FPS = 7  # what the SVD pipeline uses; decord just sees frame indices, so this is cosmetic
VAL_DIR = DST / "validation_images"
VAL_DIR.mkdir(parents=True, exist_ok=True)

episodes = json.loads((SRC / "rgb_overlay" / "episodes.json").read_text())["episodes"]

for ep in episodes:
    ep_id = ep["id"]
    start, end = int(ep["start"]), int(ep["end"])
    frames = []
    for i in range(start, end + 1):
        p = SRC / f"rgb_{i:06d}.jpg"
        if p.is_file():
            frames.append(imageio.imread(str(p)))
    frames = np.stack(frames)
    out_mp4 = DST / f"{ep_id}.mp4"
    imageio.mimwrite(
        str(out_mp4), frames, fps=FPS,
        codec="libx264", quality=8,
        macro_block_size=1,
        ffmpeg_params=["-movflags", "+faststart"],
    )
    print(f"wrote {out_mp4} — {len(frames)} frames @ {frames.shape[1:]}")

    # Save first frame as a validation image
    val_jpg = VAL_DIR / f"{ep_id}.jpg"
    imageio.imwrite(str(val_jpg), frames[0])

print(f"validation images in {VAL_DIR}")
