Spaces:
Building
Building
Update websocket_handler.py
Browse files- websocket_handler.py +17 -13
websocket_handler.py
CHANGED
@@ -470,16 +470,9 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
470 |
# Decode for processing
|
471 |
decoded_audio = base64.b64decode(audio_data)
|
472 |
|
473 |
-
# DEBUG: Log audio chunk size
|
474 |
-
log_debug(f"🎤 Audio chunk received: {len(decoded_audio)} bytes", session_id=session.session.session_id)
|
475 |
-
|
476 |
# Check silence
|
477 |
silence_duration = session.silence_detector.update(decoded_audio)
|
478 |
|
479 |
-
# DEBUG: Log silence detection
|
480 |
-
if silence_duration > 0:
|
481 |
-
log_debug(f"🔇 Silence detected: {silence_duration}ms", session_id=session.session.session_id)
|
482 |
-
|
483 |
# Stream to STT if available
|
484 |
if session.stt_manager and session.state == ConversationState.LISTENING:
|
485 |
# Ensure streaming is active
|
@@ -496,18 +489,25 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
496 |
return
|
497 |
|
498 |
try:
|
499 |
-
|
500 |
-
|
501 |
-
# Her 10. chunk'ta bir test mesajı gönder
|
502 |
if not hasattr(session, 'chunk_counter'):
|
503 |
session.chunk_counter = 0
|
504 |
session.chunk_counter += 1
|
505 |
|
506 |
-
if session.chunk_counter
|
|
|
|
|
507 |
log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
|
508 |
|
|
|
|
|
|
|
509 |
async for result in session.stt_manager.stream_audio(decoded_audio):
|
510 |
-
|
|
|
|
|
|
|
|
|
511 |
|
512 |
# Send transcription updates
|
513 |
await websocket.send_json({
|
@@ -519,7 +519,7 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
519 |
|
520 |
if result.is_final:
|
521 |
session.current_transcription = result.text
|
522 |
-
log_info(f"
|
523 |
|
524 |
# Final transcription geldiğinde hemen işle
|
525 |
if session.current_transcription:
|
@@ -537,6 +537,10 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
537 |
# STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
|
538 |
await session.reset_for_new_utterance()
|
539 |
return # Bu audio chunk için işlem tamamlandı
|
|
|
|
|
|
|
|
|
540 |
|
541 |
except Exception as e:
|
542 |
log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
|
|
|
470 |
# Decode for processing
|
471 |
decoded_audio = base64.b64decode(audio_data)
|
472 |
|
|
|
|
|
|
|
473 |
# Check silence
|
474 |
silence_duration = session.silence_detector.update(decoded_audio)
|
475 |
|
|
|
|
|
|
|
|
|
476 |
# Stream to STT if available
|
477 |
if session.stt_manager and session.state == ConversationState.LISTENING:
|
478 |
# Ensure streaming is active
|
|
|
489 |
return
|
490 |
|
491 |
try:
|
492 |
+
# Chunk counter - sadece önemli milestone'larda logla
|
|
|
|
|
493 |
if not hasattr(session, 'chunk_counter'):
|
494 |
session.chunk_counter = 0
|
495 |
session.chunk_counter += 1
|
496 |
|
497 |
+
if session.chunk_counter == 1:
|
498 |
+
log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
|
499 |
+
elif session.chunk_counter % 100 == 0:
|
500 |
log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
|
501 |
|
502 |
+
# STT'ye gönder ve sonuçları bekle
|
503 |
+
result_received = False
|
504 |
+
|
505 |
async for result in session.stt_manager.stream_audio(decoded_audio):
|
506 |
+
result_received = True
|
507 |
+
|
508 |
+
# Sadece anlamlı sonuçları logla
|
509 |
+
if result.text.strip(): # Boş olmayan text varsa
|
510 |
+
log_info(f"🎤 STT: '{result.text}' (final: {result.is_final})", session_id=session.session.session_id)
|
511 |
|
512 |
# Send transcription updates
|
513 |
await websocket.send_json({
|
|
|
519 |
|
520 |
if result.is_final:
|
521 |
session.current_transcription = result.text
|
522 |
+
log_info(f"✅ FINAL TRANSCRIPTION: '{result.text}'", session_id=session.session.session_id)
|
523 |
|
524 |
# Final transcription geldiğinde hemen işle
|
525 |
if session.current_transcription:
|
|
|
537 |
# STT'den final result geldiğinde audio buffer'ı ve transcription'ı resetle
|
538 |
await session.reset_for_new_utterance()
|
539 |
return # Bu audio chunk için işlem tamamlandı
|
540 |
+
|
541 |
+
# Her 200 chunk'ta bir result gelmiyorsa uyar
|
542 |
+
if not result_received and session.chunk_counter % 200 == 0:
|
543 |
+
log_warning(f"⚠️ No STT results after {session.chunk_counter} chunks", session_id=session.session.session_id)
|
544 |
|
545 |
except Exception as e:
|
546 |
log_error(f"❌ STT streaming error", error=str(e), traceback=traceback.format_exc(), session_id=session.session.session_id)
|