Spaces:
Building
Building
Update websocket_handler.py
Browse files- websocket_handler.py +36 -27
websocket_handler.py
CHANGED
@@ -313,25 +313,35 @@ class RealtimeSession:
|
|
313 |
self.last_stt_stop_time = datetime.now()
|
314 |
|
315 |
async def restart_stt_if_needed(self):
|
316 |
-
"""Restart STT if
|
317 |
try:
|
318 |
-
#
|
319 |
-
if not self.
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
# Yeni session başlat (initialize_stt zaten stop_stt_streaming'i çağırıyor)
|
324 |
-
stt_initialized = await self.initialize_stt()
|
325 |
-
if stt_initialized:
|
326 |
-
log_info(f"✅ STT stream restarted successfully", session_id=self.session.session_id)
|
327 |
-
return True
|
328 |
-
else:
|
329 |
-
log_error(f"❌ Failed to restart STT stream", session_id=self.session.session_id)
|
330 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
return True
|
|
|
332 |
except Exception as e:
|
333 |
-
log_error(f"❌
|
334 |
-
|
335 |
return False
|
336 |
|
337 |
async def change_state(self, new_state: ConversationState):
|
@@ -513,12 +523,8 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
513 |
|
514 |
if session.chunk_counter == 1:
|
515 |
log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
|
516 |
-
# İlk chunk'
|
517 |
-
|
518 |
-
if decoded_audio[:4] == b'\x1a\x45\xdf\xa3':
|
519 |
-
log_info(f"✅ Valid WEBM header detected", session_id=session.session.session_id)
|
520 |
-
else:
|
521 |
-
log_warning(f"⚠️ Unknown audio format, first 4 bytes: {decoded_audio[:4].hex()}", session_id=session.session.session_id)
|
522 |
elif session.chunk_counter % 100 == 0:
|
523 |
log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
|
524 |
|
@@ -545,11 +551,14 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
545 |
|
546 |
# State'i değiştir
|
547 |
await session.change_state(ConversationState.PROCESSING_STT)
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
|
|
|
|
|
|
553 |
|
554 |
# Process user input
|
555 |
await process_user_input(websocket, session)
|
@@ -577,7 +586,7 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
577 |
"error_type": "audio_error",
|
578 |
"message": f"Audio processing error: {str(e)}"
|
579 |
})
|
580 |
-
|
581 |
# ========================= MAIN HANDLER =========================
|
582 |
async def websocket_endpoint(websocket: WebSocket, session_id: str):
|
583 |
"""Main WebSocket endpoint for real-time conversation"""
|
|
|
313 |
self.last_stt_stop_time = datetime.now()
|
314 |
|
315 |
async def restart_stt_if_needed(self):
|
316 |
+
"""Restart STT streaming if needed"""
|
317 |
try:
|
318 |
+
# STT yoksa baştan oluştur
|
319 |
+
if not self.stt_manager:
|
320 |
+
await self.create_stt_manager()
|
321 |
+
if not self.stt_manager:
|
322 |
+
log_error(f"❌ Failed to create STT manager", session_id=self.session.session_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
return False
|
324 |
+
|
325 |
+
# Streaming başlat
|
326 |
+
config = ConfigProvider.get().global_config.stt_provider.settings
|
327 |
+
stt_config = {
|
328 |
+
'language': self.get_stt_language(),
|
329 |
+
'interim_results': True,
|
330 |
+
'single_utterance': False, # Continuous listening için False
|
331 |
+
'enable_punctuation': True,
|
332 |
+
'sample_rate': 16000,
|
333 |
+
'encoding': 'LINEAR16' # WEBM_OPUS yerine LINEAR16 kullan
|
334 |
+
}
|
335 |
+
|
336 |
+
await self.stt_manager.start_streaming(stt_config)
|
337 |
+
self.is_streaming = True
|
338 |
+
|
339 |
+
log_info(f"✅ STT streaming started successfully with clean state", session_id=self.session.session_id)
|
340 |
return True
|
341 |
+
|
342 |
except Exception as e:
|
343 |
+
log_error(f"❌ Failed to restart STT", error=str(e), traceback=traceback.format_exc(), session_id=self.session.session_id)
|
344 |
+
self.is_streaming = False
|
345 |
return False
|
346 |
|
347 |
async def change_state(self, new_state: ConversationState):
|
|
|
523 |
|
524 |
if session.chunk_counter == 1:
|
525 |
log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
|
526 |
+
# İlk chunk log'u - format kontrolü kaldırıldı
|
527 |
+
log_info(f"📤 First chunk - size: {len(decoded_audio)} bytes", session_id=session.session.session_id)
|
|
|
|
|
|
|
|
|
528 |
elif session.chunk_counter % 100 == 0:
|
529 |
log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
|
530 |
|
|
|
551 |
|
552 |
# State'i değiştir
|
553 |
await session.change_state(ConversationState.PROCESSING_STT)
|
554 |
+
|
555 |
+
# State change mesajı gönder
|
556 |
+
if session.is_websocket_active:
|
557 |
+
await websocket.send_json({
|
558 |
+
"type": "state_change",
|
559 |
+
"from": "listening",
|
560 |
+
"to": "processing_stt"
|
561 |
+
})
|
562 |
|
563 |
# Process user input
|
564 |
await process_user_input(websocket, session)
|
|
|
586 |
"error_type": "audio_error",
|
587 |
"message": f"Audio processing error: {str(e)}"
|
588 |
})
|
589 |
+
|
590 |
# ========================= MAIN HANDLER =========================
|
591 |
async def websocket_endpoint(websocket: WebSocket, session_id: str):
|
592 |
"""Main WebSocket endpoint for real-time conversation"""
|