
    *iL                       d Z ddlmZ ddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZmZmZ 	 ddlZdZej        Zej        Zn# e$ r dZd	ZdZdZY nw xY wdd
lmZ ddlmZ ddlmZ dZ dZ!dZ"dZ# G d d          Z$ddZ%dS )a  Microphone-based subtask narration aligned with event markers.

Pattern (mirrors the validated `xarm_audio_pedal/record_audio.py`):
  - First audio-pedal press during an active recording starts a continuous
    PyAudio input stream.
  - Each subsequent press marks a segment boundary in the running stream.
  - On episode stop, the stream is closed and one WAV is written per
    segment under ``<recording_dir>/audio/`` along with a JSON sidecar.

Boundary timestamps share the camera/robot clock used by ``event_markers``
(via the ``capture_clock`` callable injected by ``DemonstrationRecorder``),
so audio narration aligns with `robot_data.npz` and converted camera frames
without a post-hoc shim.
    )annotationsN)deque)datetime)Path)DequeDictListOptionalTupleTF)CLOCK_CAMERA)CLOCK_FALLBACK)warni        i   c                      e Zd ZdZd$d%dZd&dZd&d	Zd'dZd(dZd&dZ	dZ
d)d*dZd+dZed,d            Zd&dZd&dZd-d#ZdS ).AudioRecorderu  Continuous microphone recorder driven by ``mark_boundary`` calls.

    One instance per session. Episode-scoped state (segments, frames,
    recording_dir) is rebuilt at the start of each episode.

    Boundaries are pushed in by the recorder thread via ``mark_boundary``
    (which carries the same ``(t_ns, clock)`` value used for the matching
    ``event_markers`` entry).  This guarantees that ``event_markers[i].t``
    and ``audio_segments[i].boundary_t_ns`` are bit-identical — there is
    no second clock read on the audio thread.

    Lifecycle::

        ar = AudioRecorder()
        ar.start_session()                  # spins the daemon thread
        ar.start_episode(rec_dir)           # at the top of each episode
        ar.mark_boundary(ts_ns, clock)      # called by the recorder loop on each press
        ar.stop_episode()                   # signals end of episode
        ar.wait_until_idle()                # block until WAVs are flushed
        audio = ar.drain()                  # {"audio_full": ..., "audio_segments": [...]}
        ar.stop_session()                   # at session end

    The recorder is fail-soft: if PyAudio is missing or the input device
    can't be opened, ``start_session`` prints a yellow warning and the
    recorder becomes a no-op (boundaries are ignored, no WAVs written,
    the rest of recording continues). Callers don't need to special-case
    this.
    Ndevice_indexOptional[int]returnNonec                .   || _         d | _        d| _        d| _        d | _        d | _        t          j                    | _        t          j                    | _	        | j	        
                                 g | _        d | _        t                      | _        d S )NF)_device_index_pa_enabled_session_running_thread_episode_dir	threadingEvent_episode_running_idleset	_segments_fullr   _pending_boundaries)selfr   s     */home/robot-lab/raiden_cmu/raiden/audio.py__init__zAudioRecorder.__init__S   s    )04 !&37 -1 ) 1 1_&&

%'%)
 <A77       c                \   | j         rdS t          st          d           dS 	 t          j                    | _        | j        | j                                         n| j                            | j                   nY# t          t          f$ rE}t          d| d           | j         | j                                         d| _        Y d}~dS d}~ww xY wd| _        d| _         t          j        | j        dd          | _        | j                                         t%          d           dS )	z.Spin up the daemon polling thread. Idempotent.Nu   PyAudio not installed — audio recording disabled.
Install with: uv sync --extra audio
(also: apt install portaudio19-dev on fresh Ubuntu systems)zNo microphone available (uF   ) — audio recording disabled.
Recording will continue without audio.Tzaudio-recorder)targetnamedaemonu5     ✓ AudioRecorder ready (continuous-with-segments).)r   _PYAUDIO_AVAILABLE_warnpyaudioPyAudior   r   get_default_input_device_infoget_device_info_by_indexOSErrorIOError	terminater   r   Thread_runr   startprintr&   es     r'   start_sessionzAudioRecorder.start_sessionm   sd     	F! 	N  
 F	((DH!)66888811$2DEEE! 	 	 	9A 9 9 9   x#""$$$FFFFF	  $ '9#3D
 
 
 	EFFFFFs   AA< <C:CCc                    d| _         | j                                         | j        "| j                            d           d| _        | j         | j                                         d| _        d| _        dS )z7Stop the daemon thread and release the PyAudio context.FNg       @timeout)r   r    clearr   joinr   r6   r   r&   s    r'   stop_sessionzAudioRecorder.stop_session   su     %##%%%<#Lc***DL8H   DHr)   recording_dirr   c                    | j         sdS t          |          | _        g | _        d| _        | j                                         | j                                         | j	                                         dS )u   Mark a new episode active so the daemon thread opens the stream.

        Audio capture begins immediately — the recorder calls
        ``mark_boundary`` to push each subtask-boundary timestamp.
        N)
r   r   r   r#   r$   r%   rA   r!   r"   r    )r&   rE   s     r'   start_episodezAudioRecorder.start_episode   ss     } 	F //
 &&(((
!!#####r)   t_nsintclockstrc                    | j         r| j                                        sdS | j                            t          |          t          |          f           dS )a#  Push a subtask-boundary timestamp into the audio stream.

        Called by the recorder thread on each pedal press, with the same
        ``(t_ns, clock)`` value it used for the matching ``event_markers``
        entry.  No-op when the recorder is disabled or no episode is active.
        N)r   r    is_setr%   appendrI   rK   )r&   rH   rJ   s      r'   mark_boundaryzAudioRecorder.mark_boundary   sW     } 	D$9$@$@$B$B 	F ''TCJJ(?@@@@@r)   c                J    | j         sdS | j                                         dS )zFSignal end-of-episode; the daemon will close any open stream and save.N)r   r    rA   rC   s    r'   stop_episodezAudioRecorder.stop_episode   s,    } 	F##%%%%%r)   g      @      N@r@   floatboolc                $   | j         sdS || j        k    r| j                            |          S | j                            | j                  rdS t	          d| j        dd|dd           | j                            || j        z
            S )u  Block up to ``timeout`` seconds for the daemon to flush any
        in-progress segment.

        Returns True if idle, False if ``timeout`` elapsed without the
        daemon settling.  When False, ``drain`` will return only what
        has been serialised so far — metadata.json may under-count, but
        every entry it does claim is backed by a real WAV on disk
        (segments and ``audio_full`` are recorded in state only after
        their WAV is written).
        Tr?   z(  ! Audio WAV flush still running after z.0fzs; waiting up to zs before giving up)r   _SOFT_WARN_Sr!   waitr:   )r&   r@   s     r'   wait_until_idlezAudioRecorder.wait_until_idle   s     } 	4d''':??7?333:??4#4?55 	4=t7HQ = =$<= = =	
 	
 	
 zw1B'BCCCr)   r   c                Z    | j         t          | j                  d}d| _         g | _        |S )u  Return and clear the audio metadata for the last episode.

        Returns ``{"audio_full": <dict or None>, "audio_segments": [...]}``.
        ``audio_full`` is the single concatenated WAV covering the whole
        episode (always written when at least one frame was captured).
        ``audio_segments`` is one entry per pedal press — empty when the
        operator never pressed during this episode.
        )
audio_fullaudio_segmentsN)r$   listr#   )r&   outs     r'   drainzAudioRecorder.drain   s7     *"4>22
 
 

r)   c                    | j         S )zGTrue if the recorder will actually capture audio (PyAudio + device OK).)r   rC   s    r'   enabledzAudioRecorder.enabled   s     }r)   c                   	 | j         rI| j                                        st          j        d           5|                                  | j         InH# t          t          t          f$ r.}t          dt          |          j         d| d            d }~ww xY w| j                                         d S # | j                                         w xY w)N皙?z  ! AudioRecorder thread died (: ))r   r    rM   timesleep_capture_one_episoder4   RuntimeError
ValueErrorr:   type__name__r!   r"   r;   s     r'   r8   zAudioRecorder._run   s    	' ,,3355 Jt$$$))+++	 ' ,
 z2 	 	 	 LDGG4DLLLLLMMM	 JNNDJNNs*   AA B6 B*)BBB6 6Cc           	        | j         }|| j                                         dS g }g }|fd}	 | j                            t
          t          t          d| j        t          |          }nK# t          t          f$ r7}t          d| d           | j                                         Y d}~dS d}~ww xY w| j                                         t          d           	 | j        r| j                                        r| j        r| j                                        \  }}|                    t'          |          ||t)          j                    f           t          dt'          |           d	| d
|            | j        t)          j        d           | j        r| j                                        	 |                                 |                                 n&# t          t0          f$ r t          d           Y nw xY w| j        rZ| j                                        \  }}|                    t'          |          ||t)          j                    f           | j        Zn# 	 |                                 |                                 n&# t          t0          f$ r t          d           Y nw xY w| j        rZ| j                                        \  }}|                    t'          |          ||t)          j                    f           | j        Zw xY w	 |                     |||           | j                                         dS # | j                                         w xY w)ur  Open the stream at episode start, capture continuously, save on stop.

        Audio is captured from the moment ``start_episode`` is called until
        ``stop_episode`` clears ``_episode_running``, but **only the audio
        from the first press onwards is written to disk** — the
        pre-first-press period is treated as warm-up noise and discarded.
        Boundaries are drained from ``_pending_boundaries`` (pushed by the
        recorder thread via ``mark_boundary``), producing one WAV per
        inter-press interval plus a single ``audio_full.wav`` covering
        first-press → end-of-episode.
        Nc                >    |                     |            d t          fS N)rN   _PYAUDIO_PA_CONTINUE)in_dataframe_count	time_infostatus_fs        r'   	_callbackz5AudioRecorder._capture_one_episode.<locals>._callback  s     IIg.//r)   T)formatchannelsrateinputinput_device_indexframes_per_bufferstream_callbackz!  ! AudioRecorder.open() failed (z); audio off for this episodeuC     ✓ Audio stream open (warm-up noise discarded until first press)u     ✓ Audio segment boundary #z @ ts=z clock=rb   z3  ! AudioRecorder stream cleanup raised; continuing)r   r    rA   r   open_PYAUDIO_FORMAT_INT16CHANNELSSAMPLE_RATEr   CHUNKr4   r5   r:   r!   r   rM   r%   popleftrN   lenre   rf   stop_streamcloserh   _save_audior"   )	r&   episode_dirframes
boundariesru   streamr<   ts_nsrJ   s	            r'   rg   z"AudioRecorder._capture_one_episode  s    '!'')))F 8:
 CI 	0 	0 	0 	0	X]],! #'#5"' ) #  FF ! 	 	 	 VaVVVWWW!'')))FFFFF	 	
STTT	L' !D,A,H,H,J,J ! . #'#;#C#C#E#ELE5%%s6{{E5$)++&NOOO6Z 6 6 %6 6.36 6   .  
4    ' !D,A,H,H,J,J !M""$$$\* M M M KLLLLLM * L#7??AAu!!3v;;udikk"JKKK * LM""$$$\* M M M KLLLLLM * L#7??AAu!!3v;;udikk"JKKK * L L L L L	[&*===JNNDJNNsg   :A* *B2;,B--B2CI* 9(G" " HH*L,(JL J85L7J88A%L!M M.r   r   List[bytes]r   !List[Tuple[int, int, str, float]]c                L   |sdS |dz  }|                     dd           t          j                                        d          }|d         \  }}}}	t	          j                    }
|t          |          d||
fgz   }t          t          |          dz
            D ]}||         \  }}}}||dz            \  }}}}|||         }d| d	| d
}t          ||z  |          }|||||t          |d          t          t          |d	}|d| d	| dz                      t          j        |d                     | j                            |||t          |d          |d           t!          d| d| d|dd| d	           d}t          ||z  ||d                   }|||	t          |d          t          t          |d}|dz                      t          j        |d                     ||t          |d          |d| _        t!          d| d|dd| d           dS )uI  Write per-press segment WAVs + a concatenated ``audio_full.wav``.

        Both shapes start at the **first pedal press** — pre-first-press
        frames are warm-up noise and discarded.  With N presses you get
        N segments (segment ``i`` runs from press ``i`` to press ``i+1``,
        last → end-of-episode) plus one ``audio_full.wav`` that is
        sample-identical to the concatenation of those segments.
        Nothing is written if the operator never pressed the pedal.

        We gate only on ``boundaries`` (not ``frames``): a press recorded
        before the first PyAudio callback delivered a buffer would
        otherwise drop the boundary entirely, breaking the
        ``len(audio_segments) == len(event_markers)`` contract.  Empty
        slices yield valid header-only WAVs with ``duration_s = 0.0``.
        NaudioT)parentsexist_okz%Y%m%d_%H%M%Sr   r   audio__z.wav   )	
audio_file
segment_idboundary_t_nsboundary_wall_timeend_wall_time
duration_ssample_raterw   rJ   z.jsonr   )indent)r   r   r   r   rJ   u     ✓ Saved audio segment rc   z (z.1fz	s, clock=rd   zaudio_full.wav)r   
start_t_nsstart_wall_timer   r   rw   rJ   zaudio_full.json)r   r   r   rJ   u     ✓ Saved audio_full: )mkdirr   nowstrftimere   r   range	_save_wavroundr   r   
write_textjsondumpsr#   rN   r:   r$   )r&   r   r   r   	audio_dirtimestamp_str	first_idxfirst_ts_nsfirst_clock
first_wallend_wallboundaries_with_endi	start_idxstart_ts_nsstart_clock
start_wallend_idxr   
end_wall_i
seg_frameswav_filenamedurationsidecarfull_wav_namefull_durationfull_sidecars                              r'   r   zAudioRecorder._save_audio^  s   *  	F')	t444 //@@:DQ-7	;Z 9;;([[![(3,
 
 s.//!344 (	 (	A>QRS>T;I{K(;AE(B%GQ:	' 12J <A;;;;;L \!9:FFH*!,&0!+#Ha00*$$
 
G :!::m::::FF
71---   N!!"."#%0"'!"4"4(    :Q : :, : :9: :+6: : :    )!)m";VIJJ=OPP'%)q11&  
 
 
&	&224:lST3U3U3UVVV'%q11 	
 

 	;} ; ;:; ;,7; ; ;	
 	
 	
 	
 	
r)   rn   )r   r   r   r   )r   r   )rE   r   r   r   )rH   rI   rJ   rK   r   r   )rR   )r@   rS   r   rT   )r   r   )r   rT   )r   r   r   r   r   r   r   r   )rk   
__module____qualname____doc__r(   r=   rD   rG   rO   rQ   rV   rX   r^   propertyr`   r8   rg   r    r)   r'   r   r   5   sI        :C C C C C4"G "G "G "GH
 
 
 
 $ $ $ $	A 	A 	A 	A& & & & LD D D D D.   "    X    P P P Plc
 c
 c
 c
 c
 c
r)   r   pathr   r   r   r   rS   c                   ddl }|                    t          |           d          5 }|                    t                     |                    t                     |                    t                     |D ]}|	                    |           	 ddd           n# 1 swxY w Y   t          |          t          z  t          z  S )zFWrite the captured PyAudio frames to a WAV file. Returns duration (s).r   Nwb)waver}   rK   setnchannelsr   setsampwidthSAMPLE_WIDTHsetframerater   writeframesr   r   )r   r   r   wfframes        r'   r   r     s    KKK	3t99d	#	# "r
!!!
%%%
$$$ 	" 	"ENN5!!!!	"	" " " " " " " " " " " " " " " v;;,,s   A)BB"%B")r   r   r   r   r   rS   )&r   
__future__r   r   r   re   collectionsr   r   pathlibr   typingr   r   r	   r
   r   r0   r.   paInt16r~   
paContinuero   ImportErrorraiden._clockr   _CLOCK_CAMERAr   _CLOCK_FALLBACKraiden._warnr   r/   r   r   r   r   r   r   r   r)   r'   <module>r      s    # " " " " "                         5 5 5 5 5 5 5 5 5 5 5 5 5 5

 NNN#O"-      G 	  8 7 7 7 7 7 ; ; ; ; ; ; & & & & & & L
 L
 L
 L
 L
 L
 L
 L
h- - - - - -s   A AA