ciyidogan commited on
Commit
d707306
·
verified ·
1 Parent(s): 374ac0f

Update websocket_handler.py

Browse files
Files changed (1) hide show
  1. websocket_handler.py +56 -20
websocket_handler.py CHANGED
@@ -176,23 +176,41 @@ class RealtimeSession:
176
  self.silence_threshold_ms = silence_threshold
177
 
178
  async def initialize_stt(self):
179
- """Initialize STT provider"""
180
- try:
181
- self.stt_manager = STTFactory.create_provider()
182
- if self.stt_manager:
183
- config = ConfigProvider.get().global_config.stt_provider.settings
184
- await self.stt_manager.start_streaming({
185
- "language": config.get("language", "tr-TR"),
186
- "interim_results": config.get("interim_results", True),
187
- "single_utterance": False,
188
- "enable_punctuation": config.get("enable_punctuation", True),
189
- "sample_rate": 16000,
190
- "encoding": "WEBM_OPUS"
191
- })
192
- log_info("STT manager initialized", session_id=self.session.session_id)
193
- return True
194
- except Exception as e:
195
- log_error(f"Failed to initialize STT", error=str(e), session_id=self.session.session_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  return False
197
 
198
  async def change_state(self, new_state: ConversationState):
@@ -252,11 +270,19 @@ async def websocket_endpoint(websocket: WebSocket, session_id: str):
252
  # Initialize STT
253
  stt_initialized = await realtime_session.initialize_stt()
254
  if not stt_initialized:
 
255
  await websocket.send_json({
256
  "type": "error",
257
- "message": "STT initialization failed"
 
258
  })
259
-
 
 
 
 
 
 
260
  try:
261
  while True:
262
  # Receive message
@@ -297,7 +323,17 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
297
  audio_data = message.get("data")
298
  if not audio_data:
299
  return
300
-
 
 
 
 
 
 
 
 
 
 
301
  # Check for barge-in during TTS/audio playback
302
  if session.state in [ConversationState.PLAYING_AUDIO, ConversationState.PROCESSING_TTS]:
303
  await session.handle_barge_in()
 
176
  self.silence_threshold_ms = silence_threshold
177
 
178
  async def initialize_stt(self):
179
+ """Initialize STT provider"""
180
+ try:
181
+ self.stt_manager = STTFactory.create_provider()
182
+ if self.stt_manager and self.stt_manager.supports_realtime():
183
+ config = ConfigProvider.get().global_config.stt_provider.settings
184
+
185
+ # STTConfig nesnesi oluştur, dict değil!
186
+ from stt_interface import STTConfig
187
+ stt_config = STTConfig(
188
+ language=config.get("language", "tr-TR"),
189
+ interim_results=config.get("interim_results", True),
190
+ single_utterance=False,
191
+ enable_punctuation=config.get("enable_punctuation", True),
192
+ sample_rate=16000,
193
+ encoding="WEBM_OPUS",
194
+ model=config.get("model", "latest_long"),
195
+ use_enhanced=config.get("use_enhanced", True),
196
+ # Voice Activity Detection
197
+ vad_enabled=True,
198
+ speech_timeout_ms=config.get("speech_timeout_ms", 2000),
199
+ # Noise reduction
200
+ noise_reduction_enabled=True,
201
+ noise_reduction_level=config.get("noise_reduction_level", 2)
202
+ )
203
+
204
+ await self.stt_manager.start_streaming(stt_config)
205
+ log_info("STT manager initialized", session_id=self.session.session_id)
206
+ return True
207
+ else:
208
+ log_warning("STT provider does not support realtime", session_id=self.session.session_id)
209
+ return False
210
+ except Exception as e:
211
+ log_error(f"Failed to initialize STT", error=str(e), session_id=self.session.session_id)
212
+ # STT başarısız oldu, manager'ı None yap
213
+ self.stt_manager = None
214
  return False
215
 
216
  async def change_state(self, new_state: ConversationState):
 
270
  # Initialize STT
271
  stt_initialized = await realtime_session.initialize_stt()
272
  if not stt_initialized:
273
+ # STT başarısız oldu, kullanıcıya bildir ve bağlantıyı kapat
274
  await websocket.send_json({
275
  "type": "error",
276
+ "message": "Speech-to-Text service initialization failed. Please check your configuration.",
277
+ "error_type": "stt_init_failed"
278
  })
279
+
280
+ # Cleanup ve close
281
+ await realtime_session.cleanup()
282
+ await websocket.close()
283
+ return
284
+
285
+ # STT başarılı, devam et
286
  try:
287
  while True:
288
  # Receive message
 
323
  audio_data = message.get("data")
324
  if not audio_data:
325
  return
326
+
327
+ # STT manager kontrolü
328
+ if not session.stt_manager:
329
+ log_warning("No STT manager available, ignoring audio chunk", session_id=session.session.session_id)
330
+ await websocket.send_json({
331
+ "type": "error",
332
+ "message": "Speech recognition not available",
333
+ "error_type": "stt_unavailable"
334
+ })
335
+ return
336
+
337
  # Check for barge-in during TTS/audio playback
338
  if session.state in [ConversationState.PLAYING_AUDIO, ConversationState.PROCESSING_TTS]:
339
  await session.handle_barge_in()