"""Convert towel + UMI smith300 jpg episodes -> combined MP4 dir for Motion-LoRA's
decord-based loader. Outputs:
    dataset/towel_umi_combined_20260507/<ds>__<ep_id>.mp4
    dataset/towel_umi_combined_20260507_val/<ds>__<ep_id>.jpg
"""
from pathlib import Path
import json
import imageio.v2 as imageio
import numpy as np

SRCS = [
    Path("/data/cameron/mac_robot_datasets/dataset_20260506_124503"),  # towel
    Path("/data/cameron/mac_robot_datasets/dataset_20260506_151912"),  # UMI
]
ROOT = Path(__file__).resolve().parent
DST = ROOT / "towel_umi_combined_20260507"
VAL = ROOT / "towel_umi_combined_20260507_val"
DST.mkdir(parents=True, exist_ok=True)
VAL.mkdir(parents=True, exist_ok=True)
FPS = 7  # cosmetic; decord uses frame indices

n_total = 0
for src in SRCS:
    eps = json.loads((src / "rgb_overlay" / "episodes.json").read_text())["episodes"]
    for ep in eps:
        ep_id = ep["id"]
        start, end = int(ep["start"]), int(ep["end"])
        frames = []
        for i in range(start, end + 1):
            p = src / f"rgb_{i:06d}.jpg"
            if p.is_file():
                frames.append(imageio.imread(str(p)))
        if not frames:
            print(f"  skip empty {src.name}/{ep_id}")
            continue
        frames = np.stack(frames)
        tag = f"{src.name}__{ep_id}"
        out_mp4 = DST / f"{tag}.mp4"
        imageio.mimwrite(
            str(out_mp4), frames, fps=FPS,
            codec="libx264", quality=8, macro_block_size=1,
            ffmpeg_params=["-movflags", "+faststart"],
        )
        imageio.imwrite(str(VAL / f"{tag}.jpg"), frames[0])
        print(f"  wrote {out_mp4.name} ({len(frames)} frames)")
        n_total += 1

print(f"total {n_total} episodes -> {DST}")
print(f"validation images -> {VAL}")
