ciyidogan commited on
Commit
9ac3f81
·
verified ·
1 Parent(s): fd6c79a

Update websocket_handler.py

Browse files
Files changed (1) hide show
  1. websocket_handler.py +36 -27
websocket_handler.py CHANGED
@@ -313,25 +313,35 @@ class RealtimeSession:
313
  self.last_stt_stop_time = datetime.now()
314
 
315
  async def restart_stt_if_needed(self):
316
- """Restart STT if it's not active"""
317
  try:
318
- # Sadece LISTENING state'inde ve WebSocket aktifse restart yap
319
- if not self.is_streaming and self.is_websocket_active and self.state == ConversationState.LISTENING:
320
- log_info(f"🔄 Restarting STT stream (session #{self.stt_session_count} -> #{self.stt_session_count + 1})",
321
- session_id=self.session.session_id)
322
-
323
- # Yeni session başlat (initialize_stt zaten stop_stt_streaming'i çağırıyor)
324
- stt_initialized = await self.initialize_stt()
325
- if stt_initialized:
326
- log_info(f"✅ STT stream restarted successfully", session_id=self.session.session_id)
327
- return True
328
- else:
329
- log_error(f"❌ Failed to restart STT stream", session_id=self.session.session_id)
330
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  return True
 
332
  except Exception as e:
333
- log_error(f"❌ Error restarting STT", error=str(e), traceback=traceback.format_exc(),
334
- session_id=self.session.session_id)
335
  return False
336
 
337
  async def change_state(self, new_state: ConversationState):
@@ -513,12 +523,8 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
513
 
514
  if session.chunk_counter == 1:
515
  log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
516
- # İlk chunk'ta format kontrolü yap
517
- if len(decoded_audio) >= 4:
518
- if decoded_audio[:4] == b'\x1a\x45\xdf\xa3':
519
- log_info(f"✅ Valid WEBM header detected", session_id=session.session.session_id)
520
- else:
521
- log_warning(f"⚠️ Unknown audio format, first 4 bytes: {decoded_audio[:4].hex()}", session_id=session.session.session_id)
522
  elif session.chunk_counter % 100 == 0:
523
  log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
524
 
@@ -545,11 +551,14 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
545
 
546
  # State'i değiştir
547
  await session.change_state(ConversationState.PROCESSING_STT)
548
- await websocket.send_json({
549
- "type": "state_change",
550
- "from": "listening",
551
- "to": "processing_stt"
552
- })
 
 
 
553
 
554
  # Process user input
555
  await process_user_input(websocket, session)
@@ -577,7 +586,7 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
577
  "error_type": "audio_error",
578
  "message": f"Audio processing error: {str(e)}"
579
  })
580
-
581
  # ========================= MAIN HANDLER =========================
582
  async def websocket_endpoint(websocket: WebSocket, session_id: str):
583
  """Main WebSocket endpoint for real-time conversation"""
 
313
  self.last_stt_stop_time = datetime.now()
314
 
315
  async def restart_stt_if_needed(self):
316
+ """Restart STT streaming if needed"""
317
  try:
318
+ # STT yoksa baştan oluştur
319
+ if not self.stt_manager:
320
+ await self.create_stt_manager()
321
+ if not self.stt_manager:
322
+ log_error(f"❌ Failed to create STT manager", session_id=self.session.session_id)
 
 
 
 
 
 
 
323
  return False
324
+
325
+ # Streaming başlat
326
+ config = ConfigProvider.get().global_config.stt_provider.settings
327
+ stt_config = {
328
+ 'language': self.get_stt_language(),
329
+ 'interim_results': True,
330
+ 'single_utterance': False, # Continuous listening için False
331
+ 'enable_punctuation': True,
332
+ 'sample_rate': 16000,
333
+ 'encoding': 'LINEAR16' # WEBM_OPUS yerine LINEAR16 kullan
334
+ }
335
+
336
+ await self.stt_manager.start_streaming(stt_config)
337
+ self.is_streaming = True
338
+
339
+ log_info(f"✅ STT streaming started successfully with clean state", session_id=self.session.session_id)
340
  return True
341
+
342
  except Exception as e:
343
+ log_error(f"❌ Failed to restart STT", error=str(e), traceback=traceback.format_exc(), session_id=self.session.session_id)
344
+ self.is_streaming = False
345
  return False
346
 
347
  async def change_state(self, new_state: ConversationState):
 
523
 
524
  if session.chunk_counter == 1:
525
  log_info(f"🎤 Started streaming audio to STT", session_id=session.session.session_id)
526
+ # İlk chunk log'u - format kontrolü kaldırıldı
527
+ log_info(f"📤 First chunk - size: {len(decoded_audio)} bytes", session_id=session.session.session_id)
 
 
 
 
528
  elif session.chunk_counter % 100 == 0:
529
  log_info(f"📊 Sent {session.chunk_counter} chunks to STT so far...", session_id=session.session.session_id)
530
 
 
551
 
552
  # State'i değiştir
553
  await session.change_state(ConversationState.PROCESSING_STT)
554
+
555
+ # State change mesajı gönder
556
+ if session.is_websocket_active:
557
+ await websocket.send_json({
558
+ "type": "state_change",
559
+ "from": "listening",
560
+ "to": "processing_stt"
561
+ })
562
 
563
  # Process user input
564
  await process_user_input(websocket, session)
 
586
  "error_type": "audio_error",
587
  "message": f"Audio processing error: {str(e)}"
588
  })
589
+
590
  # ========================= MAIN HANDLER =========================
591
  async def websocket_endpoint(websocket: WebSocket, session_id: str):
592
  """Main WebSocket endpoint for real-time conversation"""