@@ -46,12 +46,14 @@ vi.mock('../../src/config', () => ({
4646// to `this` in the implementation do not persist on the created instance. Passing the
4747// implementation directly to vi.fn() avoids this issue.
4848vi . mock ( '../../src/OutgoingConnection' , ( ) => ( {
49- OutgoingConnection : vi . fn ( function ( this : any , tag : string ) {
49+ OutgoingConnection : vi . fn ( function ( this : any , tag : string , inputFormat : unknown ) {
5050 this . localTag = tag ;
5151 this . participantId = tag . split ( '-' ) [ 0 ] ; // Extract participant ID from tag like "participant1-ssrc123"
5252 this . handleMediaEvent = vi . fn ( ) ;
5353 this . addTranscriptContext = vi . fn ( ) ;
5454 this . updateInputFormat = vi . fn ( ) ;
55+ this . getInputFormat = vi . fn ( ( ) => inputFormat ?? { encoding : 'opus' } ) ;
56+ this . resetChunkTracking = vi . fn ( ) ;
5557 this . close = vi . fn ( ) ;
5658 this . onInterimTranscription = undefined ;
5759 this . onCompleteTranscription = undefined ;
@@ -578,4 +580,115 @@ describe('TranscriberProxy', () => {
578580 expect ( conn . handleMediaEvent ) . toHaveBeenCalledWith ( mediaEvent ) ;
579581 } ) ;
580582 } ) ;
583+
584+ describe ( 'diagnostic logging' , ( ) => {
585+ // 'T2dnUw==' is base64 for 'OggS' — the Ogg page capture pattern.
586+ const OGG_PAYLOAD = 'T2dnUw==' ;
587+
588+ it ( 'logs the first client frame sniff exactly once per tag' , ( ) => {
589+ const proxy = new TranscriberProxy ( mockWebSocket , options ) ;
590+ proxy . handleStartEvent ( { event : 'start' , start : { tag : 'tag1' , mediaFormat : { encoding : 'opus' } } } ) ;
591+ vi . mocked ( logger . info ) . mockClear ( ) ;
592+
593+ const media1 = { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 0 , timestamp : 0 } } ;
594+ proxy . handleMediaEvent ( media1 ) ;
595+ proxy . handleMediaEvent ( media1 ) ;
596+ proxy . handleMediaEvent ( media1 ) ;
597+
598+ const sniffCalls = vi . mocked ( logger . info ) . mock . calls . filter ( ( [ msg ] ) => typeof msg === 'string' && msg . startsWith ( 'First client frame sniff:' ) ) ;
599+ expect ( sniffCalls ) . toHaveLength ( 1 ) ;
600+ const msg = sniffCalls [ 0 ] [ 0 ] as string ;
601+ expect ( msg ) . toContain ( 'tag=tag1' ) ;
602+ expect ( msg ) . toContain ( 'urlEncoding=opus' ) ;
603+ expect ( msg ) . toContain ( `startFormat='{"encoding":"opus"}'` ) ;
604+ expect ( msg ) . toContain ( '4f676753' ) ; // 'OggS' in hex
605+ expect ( msg ) . toContain ( `<b64:${ OGG_PAYLOAD . length } chars, first 4 decoded bytes=4f676753>` ) ;
606+ } ) ;
607+
608+ it ( 'logs the first client frame sniff once per participant tag' , ( ) => {
609+ const proxy = new TranscriberProxy ( mockWebSocket , options ) ;
610+ proxy . handleStartEvent ( { event : 'start' , start : { tag : 'tag1' , mediaFormat : { encoding : 'opus' } } } ) ;
611+ proxy . handleStartEvent ( { event : 'start' , start : { tag : 'tag2' , mediaFormat : { encoding : 'opus' } } } ) ;
612+ vi . mocked ( logger . info ) . mockClear ( ) ;
613+
614+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 0 , timestamp : 0 } } ) ;
615+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag2' , payload : OGG_PAYLOAD , chunk : 0 , timestamp : 0 } } ) ;
616+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 1 , timestamp : 0 } } ) ;
617+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag2' , payload : OGG_PAYLOAD , chunk : 1 , timestamp : 0 } } ) ;
618+
619+ const sniffCalls = vi . mocked ( logger . info ) . mock . calls . filter ( ( [ msg ] ) => typeof msg === 'string' && msg . startsWith ( 'First client frame sniff:' ) ) ;
620+ expect ( sniffCalls ) . toHaveLength ( 2 ) ;
621+ expect ( sniffCalls [ 0 ] [ 0 ] ) . toContain ( 'tag=tag1' ) ;
622+ expect ( sniffCalls [ 1 ] [ 0 ] ) . toContain ( 'tag=tag2' ) ;
623+ } ) ;
624+
625+ it ( 'does not sniff or count empty-payload frames, and retries the sniff on the next real frame' , ( ) => {
626+ const proxy = new TranscriberProxy ( mockWebSocket , options ) ;
627+ proxy . handleStartEvent ( { event : 'start' , start : { tag : 'tag1' , mediaFormat : { encoding : 'opus' } } } ) ;
628+ vi . mocked ( logger . info ) . mockClear ( ) ;
629+
630+ // Missing payload → should not log, should not flip the flag
631+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , chunk : 0 , timestamp : 0 } } ) ;
632+ // Empty-string payload → same
633+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : '' , chunk : 1 , timestamp : 0 } } ) ;
634+
635+ let sniffCalls = vi . mocked ( logger . info ) . mock . calls . filter ( ( [ msg ] ) => typeof msg === 'string' && msg . startsWith ( 'First client frame sniff:' ) ) ;
636+ expect ( sniffCalls ) . toHaveLength ( 0 ) ;
637+
638+ // Real audio frame → sniff now fires (not short-circuited by prior empty frames)
639+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 2 , timestamp : 0 } } ) ;
640+
641+ sniffCalls = vi . mocked ( logger . info ) . mock . calls . filter ( ( [ msg ] ) => typeof msg === 'string' && msg . startsWith ( 'First client frame sniff:' ) ) ;
642+ expect ( sniffCalls ) . toHaveLength ( 1 ) ;
643+
644+ // Session-end summary reflects that only the real frame was counted as audio
645+ vi . mocked ( logger . info ) . mockClear ( ) ;
646+ proxy . close ( ) ;
647+ const endCall = vi . mocked ( logger . info ) . mock . calls . find ( ( [ msg ] ) => typeof msg === 'string' && msg . startsWith ( 'Session ended:' ) ) ;
648+ expect ( endCall ?. [ 0 ] ) . toContain ( 'audioPackets=1' ) ;
649+ } ) ;
650+
651+ it ( 'fires the first-frame sniff again after a WebSocket reattach' , ( ) => {
652+ const proxy = new TranscriberProxy ( mockWebSocket , options ) ;
653+ proxy . handleStartEvent ( { event : 'start' , start : { tag : 'tag1' , mediaFormat : { encoding : 'opus' } } } ) ;
654+
655+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 0 , timestamp : 0 } } ) ;
656+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 1 , timestamp : 0 } } ) ;
657+
658+ vi . mocked ( logger . info ) . mockClear ( ) ;
659+ proxy . reattachWebSocket ( { addEventListener : vi . fn ( ) , send : vi . fn ( ) , close : vi . fn ( ) } as any ) ;
660+
661+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 0 , timestamp : 0 } } ) ;
662+ const sniffCalls = vi . mocked ( logger . info ) . mock . calls . filter ( ( [ msg ] ) => typeof msg === 'string' && msg . startsWith ( 'First client frame sniff:' ) ) ;
663+ expect ( sniffCalls ) . toHaveLength ( 1 ) ;
664+ } ) ;
665+
666+ it ( 'emits a session-end summary with audioPackets, interims, finals, and provider' , ( ) => {
667+ const proxy = new TranscriberProxy ( mockWebSocket , { ...options , provider : 'deepgram' } ) ;
668+ proxy . handleStartEvent ( { event : 'start' , start : { tag : 'tag1' , mediaFormat : { encoding : 'opus' } } } ) ;
669+ const conn = vi . mocked ( OutgoingConnection ) . mock . instances [ 0 ] as any ;
670+
671+ // 3 audio packets
672+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 0 , timestamp : 0 } } ) ;
673+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 1 , timestamp : 0 } } ) ;
674+ proxy . handleMediaEvent ( { event : 'media' , media : { tag : 'tag1' , payload : OGG_PAYLOAD , chunk : 2 , timestamp : 0 } } ) ;
675+
676+ // 2 interims + 1 final via the connection callbacks
677+ conn . onInterimTranscription ( { transcript : [ ] , is_interim : true , message_id : 'a' , type : 'transcription-result' , event : 'transcription-result' , participant : { id : 'tag1' } , timestamp : 0 } ) ;
678+ conn . onInterimTranscription ( { transcript : [ ] , is_interim : true , message_id : 'b' , type : 'transcription-result' , event : 'transcription-result' , participant : { id : 'tag1' } , timestamp : 0 } ) ;
679+ conn . onCompleteTranscription ( { transcript : [ { text : 'hi' } ] , is_interim : false , message_id : 'c' , type : 'transcription-result' , event : 'transcription-result' , participant : { id : 'tag1' } , timestamp : 0 } ) ;
680+
681+ vi . mocked ( logger . info ) . mockClear ( ) ;
682+ proxy . close ( ) ;
683+
684+ const endCall = vi . mocked ( logger . info ) . mock . calls . find ( ( [ msg ] ) => typeof msg === 'string' && msg . startsWith ( 'Session ended:' ) ) ;
685+ expect ( endCall ) . toBeDefined ( ) ;
686+ const endMsg = endCall ! [ 0 ] as string ;
687+ expect ( endMsg ) . toContain ( 'provider=deepgram' ) ;
688+ expect ( endMsg ) . toContain ( 'audioPackets=3' ) ;
689+ expect ( endMsg ) . toContain ( 'interims=2' ) ;
690+ expect ( endMsg ) . toContain ( 'finals=1' ) ;
691+ expect ( endMsg ) . toMatch ( / d u r a t i o n S e c = \d + \. \d / ) ;
692+ } ) ;
693+ } ) ;
581694} ) ;
0 commit comments