ciyidogan commited on
Commit
e5e4cc9
·
verified ·
1 Parent(s): c114a53

Update websocket_handler.py

Browse files
Files changed (1) hide show
  1. websocket_handler.py +62 -82
websocket_handler.py CHANGED
@@ -473,94 +473,74 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
473
  # Check silence
474
  silence_duration = session.silence_detector.update(decoded_audio)
475
 
476
- # Stream to STT if available
477
- if session.stt_manager and session.state == ConversationState.LISTENING:
478
- # Ensure streaming is active
479
- if not session.is_streaming:
480
- log_warning(f"⚠️ STT manager exists but streaming not active", session_id=session.session.session_id)
481
- # Try to restart streaming
482
- stt_initialized = await session.initialize_stt()
483
- if not stt_initialized:
484
- await websocket.send_json({
485
- "type": "error",
486
- "error_type": "stt_error",
487
- "message": "STT streaming not available"
488
- })
489
- return
 
 
 
 
 
 
490
 
491
- try:
492
- # Chunk counter - sadece önemli milestone'larda logla
493
- if not hasattr(session, 'chunk_counter'):
494
- session.chunk_counter = 0
495
- session.chunk_counter += 1
496
-
497
- if session.chunk_counter == 1:
498
- log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
499
- elif session.chunk_counter % 100 == 0:
500
- log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
501
 
502
- # STT'ye gönder ve sonuçları bekle
503
- result_received = False
 
 
 
 
 
504
 
505
- async for result in session.stt_manager.stream_audio(decoded_audio):
506
- result_received = True
507
-
508
- # Sadece anlamlı sonuçları logla
509
- if result.text.strip(): # Boş olmayan text varsa
510
- log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
511
-
512
- # Send transcription updates
513
- await websocket.send_json({
514
- "type": "transcription",
515
- "text": result.text,
516
- "is_final": result.is_final,
517
- "confidence": result.confidence
518
- })
519
 
520
- if result.is_final:
521
- session.current_transcription = result.text
522
- log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
 
 
 
 
 
 
523
 
524
- # Final transcription geldiğinde hemen işle
525
- if session.current_transcription:
526
- # State'i değiştir ve user input'u işle
527
- await session.change_state(ConversationState.PROCESSING_STT)
528
- await websocket.send_json({
529
- "type": "state_change",
530
- "from": "listening",
531
- "to": "processing_stt"
532
- })
533
-
534
- # Process user input
535
- await process_user_input(websocket, session)
536
-
537
- # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
538
- await session.reset_for_new_utterance()
539
- return # Bu audio chunk için işlem tamamlandı
540
-
541
- # Her 200 chunk'ta bir result gelmiyorsa uyar
542
- if not result_received and session.chunk_counter % 200 == 0:
543
- log_warning(f"⚠️ No STT results after {session.chunk_counter} chunks", session_id=session.session.session_id)
544
 
545
- except Exception as e:
546
- log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
547
- await websocket.send_json({
548
- "type": "error",
549
- "error_type": "stt_error",
550
- "message": f"STT error: {str(e)}"
551
- })
552
-
553
- except Exception as e:
554
- log_error(
555
- f"❌ Audio chunk handling error",
556
- error=str(e),
557
- traceback=traceback.format_exc(),
558
- session_id=session.session.session_id
559
- )
560
- await websocket.send_json({
561
- "type": "error",
562
- "message": f"Audio processing error: {str(e)}"
563
- })
564
 
565
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
566
  """Handle control messages"""
 
473
  # Check silence
474
  silence_duration = session.silence_detector.update(decoded_audio)
475
 
476
+ # Stream to STT if available
477
+ if session.stt_manager and session.state == ConversationState.LISTENING:
478
+ # Ensure streaming is active
479
+ if not session.is_streaming:
480
+ log_warning(f"⚠️ STT manager exists but streaming not active", session_id=session.session.session_id)
481
+ # Try to restart streaming
482
+ stt_initialized = await session.initialize_stt()
483
+ if not stt_initialized:
484
+ await websocket.send_json({
485
+ "type": "error",
486
+ "error_type": "stt_error",
487
+ "message": "STT streaming not available"
488
+ })
489
+ return
490
+
491
+ try:
492
+ # Chunk counter - sadece önemli milestone'larda logla
493
+ if not hasattr(session, 'chunk_counter'):
494
+ session.chunk_counter = 0
495
+ session.chunk_counter += 1
496
 
497
+ if session.chunk_counter == 1:
498
+ log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
499
+ elif session.chunk_counter % 100 == 0:
500
+ log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
501
+
502
+ # STT'ye gönder ve sonuçları bekle
503
+ async for result in session.stt_manager.stream_audio(decoded_audio):
504
+ # Sadece anlamlı sonuçları logla
505
+ if result.text.strip(): # Boş olmayan text varsa
506
+ log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
507
 
508
+ # Send transcription updates
509
+ await websocket.send_json({
510
+ "type": "transcription",
511
+ "text": result.text,
512
+ "is_final": result.is_final,
513
+ "confidence": result.confidence
514
+ })
515
 
516
+ if result.is_final:
517
+ session.current_transcription = result.text
518
+ log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
 
 
 
 
 
 
 
 
 
 
 
519
 
520
+ # Final transcription geldiğinde hemen işle
521
+ if session.current_transcription:
522
+ # State'i değiştir ve user input'u işle
523
+ await session.change_state(ConversationState.PROCESSING_STT)
524
+ await websocket.send_json({
525
+ "type": "state_change",
526
+ "from": "listening",
527
+ "to": "processing_stt"
528
+ })
529
 
530
+ # Process user input
531
+ await process_user_input(websocket, session)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
+ # STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
534
+ await session.reset_for_new_utterance()
535
+ return # Bu audio chunk için işlem tamamlandı
536
+
537
+ except Exception as e:
538
+ log_error(f" STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
539
+ await websocket.send_json({
540
+ "type": "error",
541
+ "error_type": "stt_error",
542
+ "message": f"STT error: {str(e)}"
543
+ })
 
 
 
 
 
 
 
 
544
 
545
  async def handle_control_message(websocket: WebSocket, session: RealtimeSession, message: Dict[str, Any]):
546
  """Handle control messages"""