o
     )jI!  ã                   @   s°   d Z ddlZddlZddlZddlmZ g d¢Ze d¡Ze d¡Z	e d¡Z
dd	„ Zd
d„ Zdd„ Zdd„ Zdd„ Zddd„Zddd„Zdd„ Zdd„ Zdd„ Zddd„ZdS )u  Filesystem scan/parse for the mock-wandb viewer (omidlab.net/runs/).

Pure functions, no Flask. serve.py imports list_runs / run_detail / find_run and
wraps them in thin JSON routes (mirrors the data_viewer pattern).

Design notes live in vault/fleet/agents/our_wandb/{overview,memory}.md. Highlights:
- Run dirs: ``run-<YYYYMMDD_HHMMSS>-<id>/`` under the configured roots.
- Grouping key = ``--name`` (newer train.py) or ``--run_name`` (older), read from
  ``files/wandb-metadata.json`` args (always present). Fallback: program basename.
- Loss step-series come from ``files/output.log`` (newer train.py prints
  ``ckpt @ <step>  total=.. vol=.. grip=.. ..``). Older runs have no inline losses;
  we surface the final value from wandb-summary.json instead. No .wandb parsing (v2).
- config.yaml / wandb-summary.json only exist on COMPLETED runs â€” never required.

Fail-loud per fleet GUIDELINES, EXCEPT the list path tolerates a single unreadable
run dir (stale FUSE) by skipping it â€” one wedged dir must not blank the dashboard.
é    N)ÚPath)z0/home/cameronsmith/mnt/yukon/cameron/puget/wandbz5/home/cameronsmith/mnt/yukon/cameron/puget/code/wandbz5/home/cameronsmith/mnt/yukon/cameron/puget/data/wandbz"^run-(\d{8}_\d{6})-([0-9a-zA-Z]+)$z-ckpt @ (\d+)\s+(.*?)(?:\s+[\d.]+\s*it/s)?\s*$z,([A-Za-z_/]+)=([-+]?[\d.]+(?:[eE][-+]?\d+)?)c                  C   s(   t j d¡} | rdd„ |  d¡D ƒS tS )NZWANDB_ROOTSc                 S   s   g | ]
}|  ¡ r|  ¡ ‘qS © )Ústrip©Ú.0Úrr   r   ú0/data/cameron/para/.agents/reports/wandb_runs.pyÚ
<listcomp>'   s    zget_roots.<locals>.<listcomp>ú,)ÚosÚenvironÚgetÚsplitÚDEFAULT_ROOTS)Úenvr   r   r   Ú	get_roots$   s   r   c                 G   s@   |D ]}|| v r|   |¡}|d t| ƒk r| |d    S qdS )zHReturn the value following the first matching flag in a wandb args list.é   N)ÚindexÚlen)ÚargsÚflagsÚflagÚir   r   r   Ú
_arg_value+   s   
€r   c                 C   s&   | d d }|  ¡ si S t | ¡ ¡S )uK   Parse wandb-metadata.json â†’ dict (always present). Returns {} if missing.Úfileszwandb-metadata.json)ÚexistsÚjsonÚloadsÚ	read_text)Úrun_dirÚpr   r   r   Ú_read_metadata5   s   r!   c                 C   s@   |   dg ¡pg }t|ddƒ}|r|S |   d¡}|rt|ƒjS |S )Nr   z--namez
--run_nameÚprogram)r   r   r   Ústem)ÚmetaÚrun_idr   Únamer"   r   r   r   Ú	_run_name=   s   

r'   c                 C   s8   | d d }|  ¡ si S t | ¡ ¡}dd„ | ¡ D ƒS )zCFinal scalar metrics from wandb-summary.json (completed runs only).r   zwandb-summary.jsonc                 S   s.   i | ]\}}t |ttfƒr| d ¡s||“qS )Ú_)Ú
isinstanceÚintÚfloatÚ
startswith©r   ÚkÚvr   r   r   Ú
<dictcomp>O   s    ýýÿz&_run_summary_brief.<locals>.<dictcomp>)r   r   r   r   Úitems)r   r    Údatar   r   r   Ú_run_summary_briefH   s   þr3   c                 C   sª  | pt ƒ } g }d}tƒ }| D ]|}t|ƒ}| ¡ sqzt| ¡ ƒ}W n ty.   |d7 }Y qw |D ]X}t |j	¡}|s<q1| 
d¡}||v rFq1zt|ƒ}	| ¡ j}
W n ty^   |d7 }Y q1w | |¡ | |t|	|ƒ|	 d¡|	 d¡|	 d¡t|	 dg ¡pg dƒ|
t|ƒd	œ¡ q1qi }|D ]}| |d
 g ¡ |¡ qg }| ¡ D ]\}}|jdd„ dd | |t|ƒ|d d |dœ¡ q£|jdd„ dd ||dd„ | D ƒdœS )u  Scan roots â†’ list of run summaries grouped by name.

    Returns {"groups": [{"name", "count", "runs": [...]}], "n_skipped": int,
             "roots": [...]}, newest run first within each group, groups sorted
             by their newest run's mtime.
    r   r   é   Ú	startedAtÚhostÚgpur   z--wandb_project)Úidr&   Ú
started_atr6   r7   ZprojectÚmtimeÚrootr&   c                 S   ó   | d S )Nr:   r   )r   r   r   r   Ú<lambda>ˆ   ó    zlist_runs.<locals>.<lambda>T)ÚkeyÚreverser:   )r&   ÚcountÚnewest_mtimeÚrunsc                 S   r<   )NrB   r   )Úgr   r   r   r=      r>   c                 S   s   g | ]}t |ƒ‘qS r   )Ústrr   r   r   r   r	      s    zlist_runs.<locals>.<listcomp>)ÚgroupsÚ	n_skippedÚroots)r   Úsetr   Úis_dirÚsortedÚiterdirÚOSErrorÚ_RUN_REÚmatchr&   Úgroupr!   ÚstatÚst_mtimeÚaddÚappendr'   r   r   rE   Ú
setdefaultr1   Úsortr   )rH   rC   ZskippedZseen_idsr;   ÚentriesÚdÚmr%   r$   r:   rF   r   Úoutr&   Z
group_runsr   r   r   Ú	list_runsV   sl   
þ
þ

øò

ür[   c              	   C   sz   |pt ƒ }|D ]3}t|ƒ}| ¡ sqz| ¡ D ]}t |j¡}|r/| d¡| kr/|  W   S qW q ty:   Y qw dS )z>Locate a run dir by its id across roots. Returns Path or None.r4   N)	r   r   rJ   rL   rN   rO   r&   rP   rM   )r%   rH   r;   rX   rY   r   r   r   Úfind_run“   s    
€ýÿr\   c                 C   sˆ   | d d }|  ¡ si S i }|jdd ¡ D ]+}t |¡}|s qt| d¡ƒ}t | d¡¡D ]\}}| 	|g ¡ 
|t|ƒg¡ q/q|S )z‘Parse newer train.py's output.log into {metric: [[step, val], ...]}.

    Returns {} when no inline loss lines exist (older runs / pre-training).r   z
output.logÚreplace)Úerrorsr   r4   )r   r   Ú
splitlinesÚ_CKPT_REÚsearchr*   rP   Ú_KV_REÚfindallrU   rT   r+   )r   r    ÚseriesÚlineÚcmÚstepr?   Úvalr   r   r   Úparse_loss_series¤   s   
ÿri   c                 C   s   | d d d }|  ¡ sg S i }| d¡D ]P}| | d ¡}|j}| dd¡}d}|jj}t|ƒdkrW|d	  ¡ rWt	|d	 ƒ}d|d
 v rO|d
  dd	¡d
 n|d
 }	|	pV|}| 
|g ¡ t|ƒ|dœ¡ q| ¡ D ]
}
|
jdd„ d qjdd„ t| ¡ ƒD ƒS )zGroup logged PNGs by panel key. Returns [{"panel", "images": [...]}].

    Each image: {"rel": <path under files/>, "step": int|None}. Newest step last.
    r   ÚmediaÚimagesz*.pngr(   r4   Né   r   r   )Úrelrg   c                 S   s   | d d u | d p
dfS )Nrg   r   r   )r   r   r   r   r=   Ì   s    zlist_media.<locals>.<lambda>)r?   c                 S   s   g | ]	\}}||d œ‘qS ))Úpanelrk   r   r-   r   r   r   r	   Í   s    zlist_media.<locals>.<listcomp>)rJ   ÚrglobÚrelative_tor#   ÚrsplitÚparentr&   r   Úisdigitr*   rU   rT   rE   ÚvaluesrV   rK   r1   )r   Z
media_rootZpanelsZpngrm   r#   Úpartsrg   rn   r?   Zimgsr   r   r   Ú
list_media¶   s$   (rv   c                 C   s¨   |  dg ¡pg }i }d}|t|ƒk rR|| }| d¡rH|d t|ƒk r<||d   d¡s<||d  || d¡< |d7 }nd|| d¡< |d7 }n|d7 }|t|ƒk s|S )zKBest-effort config: prefer metadata args (always present, flat & readable).r   r   z--r   ú-r4   T)r   r   r,   Úlstrip)r   r$   r   Zcfgr   Ztokr   r   r   Ú_config_argsÐ   s   
"

öry   c                 C   s€   t | |ƒ}|du rdS t|ƒ}t|ƒ}| t|| ƒt|ƒ| d¡| d¡| d¡| d¡| d¡t||ƒt|ƒ|t|ƒt	|ƒdœS )z@Full per-run payload for the detail view. None if run not found.Nr5   r6   r7   Úpythonr"   )r8   r&   Údirr9   r6   r7   rz   r"   ÚconfigÚsummaryÚloss_seriesÚhas_step_historyrj   )
r\   r!   ri   r'   rE   r   ry   r3   Úboolrv   )r%   rH   r   r$   rd   r   r   r   Ú
run_detailã   s&   
ór   )N)Ú__doc__r   r   ÚreÚpathlibr   r   ÚcompilerN   r`   rb   r   r   r!   r'   r3   r[   r\   ri   rv   ry   r   r   r   r   r   Ú<module>   s(    





=