#!/bin/bash
# Launch DROID full-dataset training after download completes.
# Run with: bash launch_droid_train.sh

set -e

DATA_ROOT="/data/cameron/droid"
SCRIPT_DIR="/data/cameron/para_droid_pretrain/libero"

echo "Waiting for DROID download to complete..."
while tmux has-session -t droid_download 2>/dev/null; do
    DOWNLOADED=$(du -sh "$DATA_ROOT" 2>/dev/null | cut -f1)
    N_PARQUET=$(find "$DATA_ROOT/data" -name "*.parquet" 2>/dev/null | wc -l)
    N_VIDEO=$(find "$DATA_ROOT/videos" -name "*.mp4" 2>/dev/null | wc -l)
    echo "  $(date '+%H:%M:%S') — $DOWNLOADED downloaded, $N_PARQUET parquets, $N_VIDEO videos"
    sleep 120
done

echo "Download complete!"
echo "  Total size: $(du -sh $DATA_ROOT | cut -f1)"
echo "  Parquets: $(find $DATA_ROOT/data -name '*.parquet' | wc -l)"
echo "  Videos: $(find $DATA_ROOT/videos -name '*.mp4' | wc -l)"

echo ""
echo "Launching training..."
cd "$SCRIPT_DIR"
MUJOCO_GL=egl CUDA_VISIBLE_DEVICES=5 \
  DINO_REPO_DIR=/data/cameron/keygrip/volume_dino_tracks \
  DINO_WEIGHTS_PATH=/data/cameron/keygrip/dinov3/weights/dinov3_vits16plus_pretrain_lvd1689m-4057cbaa.pth \
  python train.py \
    --droid \
    --droid_data_root "$DATA_ROOT" \
    --droid_camera ext2 \
    --batch_size 48 \
    --epochs 20 \
    --lr 1e-4 \
    --run_name droid_pretrain_full_ext2 \
    --wandb_project para_droid \
    --wandb_mode online \
    --skip_rotation \
    --frame_stride 2 \
    --vis_every_steps 200 \
    --save_every_steps 2000 \
    2>&1 | tee /data/cameron/para_droid_pretrain/train_droid_full.log
