ciyidogan commited on
Commit
c582b8f
·
verified ·
1 Parent(s): 4e2b388

Update websocket_handler.py

Browse files
Files changed (1) hide show
  1. websocket_handler.py +75 -63
websocket_handler.py CHANGED
@@ -473,74 +473,86 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
473
  # Check silence
474
  silence_duration = session.silence_detector.update(decoded_audio)
475
 
476
- # Stream to STT if available
477
- if session.stt_manager and session.state == ConversationState.LISTENING:
478
- # Ensure streaming is active
479
- if not session.is_streaming:
480
- log_warning(f"⚠️ STT manager exists but streaming not active", session_id=session.session.session_id)
481
- # Try to restart streaming
482
- stt_initialized = await session.initialize_stt()
483
- if not stt_initialized:
484
- await websocket.send_json({
485
- "type": "error",
486
- "error_type": "stt_error",
487
- "message": "STT streaming not available"
488
- })
489
- return
490
-
491
- try:
492
- # Chunk counter - sadece önemli milestone'larda logla
493
- if not hasattr(session, 'chunk_counter'):
494
- session.chunk_counter = 0
495
- session.chunk_counter += 1
496
-
497
- if session.chunk_counter == 1:
498
- log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
499
- elif session.chunk_counter % 100 == 0:
500
- log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
501
 
502
- # STT'ye gönder ve sonuçları bekle
503
- async for result in session.stt_manager.stream_audio(decoded_audio):
504
- # Sadece anlamlı sonuçları logla
505
- if result.text.strip(): # Boş olmayan text varsa
506
- log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
507
 
508
- # Send transcription updates
509
- await websocket.send_json({
510
- "type": "transcription",
511
- "text": result.text,
512
- "is_final": result.is_final,
513
- "confidence": result.confidence
514
- })
515
 
516
- if result.is_final:
517
- session.current_transcription = result.text
518
- log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
 
 
519
 
520
- # Final transcription geldiğinde hemen işle
521
- if session.current_transcription:
522
- # State'i değiştir ve user input'u işle
523
- await session.change_state(ConversationState.PROCESSING_STT)
524
- await websocket.send_json({
525
- "type": "state_change",
526
- "from": "listening",
527
- "to": "processing_stt"
528
- })
529
-
530
- # Process user input
531
- await process_user_input(websocket, session)
532
-
533
- # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
534
- await session.reset_for_new_utterance()
535
- return # Bu audio chunk için işlem tamamlandı
536
 
537
- except Exception as e:
538
- log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
539
- await websocket.send_json({
540
- "type": "error",
541
- "error_type": "stt_error",
542
- "message": f"STT error: {str(e)}"
543
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
 
545
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
546
  """Handle control messages"""
 
473
  # Check silence
474
  silence_duration = session.silence_detector.update(decoded_audio)
475
 
476
+ # Stream to STT if available
477
+ if session.stt_manager and session.state == ConversationState.LISTENING:
478
+ # Ensure streaming is active
479
+ if not session.is_streaming:
480
+ log_warning(f"⚠️ STT manager exists but streaming not active", session_id=session.session.session_id)
481
+ # Try to restart streaming
482
+ stt_initialized = await session.initialize_stt()
483
+ if not stt_initialized:
484
+ await websocket.send_json({
485
+ "type": "error",
486
+ "error_type": "stt_error",
487
+ "message": "STT streaming not available"
488
+ })
489
+ return
 
 
 
 
 
 
 
 
 
 
 
490
 
491
+ try:
492
+ # Chunk counter - sadece önemli milestone'larda logla
493
+ if not hasattr(session, 'chunk_counter'):
494
+ session.chunk_counter = 0
495
+ session.chunk_counter += 1
496
 
497
+ if session.chunk_counter == 1:
498
+ log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
499
+ elif session.chunk_counter % 100 == 0:
500
+ log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
 
 
 
501
 
502
+ # STT'ye gönder ve sonuçları bekle
503
+ async for result in session.stt_manager.stream_audio(decoded_audio):
504
+ # Sadece anlamlı sonuçları logla
505
+ if result.text.strip(): # Boş olmayan text varsa
506
+ log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
507
 
508
+ # Send transcription updates
509
+ await websocket.send_json({
510
+ "type": "transcription",
511
+ "text": result.text,
512
+ "is_final": result.is_final,
513
+ "confidence": result.confidence
514
+ })
515
+
516
+ if result.is_final:
517
+ session.current_transcription = result.text
518
+ log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
 
 
 
 
 
519
 
520
+ # Final transcription geldiğinde hemen işle
521
+ if session.current_transcription:
522
+ # State'i değiştir ve user input'u işle
523
+ await session.change_state(ConversationState.PROCESSING_STT)
524
+ await websocket.send_json({
525
+ "type": "state_change",
526
+ "from": "listening",
527
+ "to": "processing_stt"
528
+ })
529
+
530
+ # Process user input
531
+ await process_user_input(websocket, session)
532
+
533
+ # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
534
+ await session.reset_for_new_utterance()
535
+ return # Bu audio chunk için işlem tamamlandı
536
+
537
+ except Exception as e:
538
+ log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
539
+ await websocket.send_json({
540
+ "type": "error",
541
+ "error_type": "stt_error",
542
+ "message": f"STT error: {str(e)}"
543
+ })
544
+
545
+ except Exception as e:
546
+ log_error(
547
+ f"❌ Audio chunk handling error",
548
+ error=str(e),
549
+ traceback=traceback.format_exc(),
550
+ session_id=session.session.session_id
551
+ )
552
+ await websocket.send_json({
553
+ "type": "error",
554
+ "message": f"Audio processing error: {str(e)}"
555
+ })
556
 
557
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
558
  """Handle control messages"""