Spaces:
Running
Running
Update websocket_handler.py
Browse files- websocket_handler.py +56 -20
websocket_handler.py
CHANGED
@@ -176,23 +176,41 @@ class RealtimeSession:
|
|
176 |
self.silence_threshold_ms = silence_threshold
|
177 |
|
178 |
async def initialize_stt(self):
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
return False
|
197 |
|
198 |
async def change_state(self, new_state: ConversationState):
|
@@ -252,11 +270,19 @@ async def websocket_endpoint(websocket: WebSocket, session_id: str):
|
|
252 |
# Initialize STT
|
253 |
stt_initialized = await realtime_session.initialize_stt()
|
254 |
if not stt_initialized:
|
|
|
255 |
await websocket.send_json({
|
256 |
"type": "error",
|
257 |
-
"message": "
|
|
|
258 |
})
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
try:
|
261 |
while True:
|
262 |
# Receive message
|
@@ -297,7 +323,17 @@ async def handle_audio_chunk(websocket: WebSocket, session: RealtimeSession, mes
|
|
297 |
audio_data = message.get("data")
|
298 |
if not audio_data:
|
299 |
return
|
300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
# Check for barge-in during TTS/audio playback
|
302 |
if session.state in [ConversationState.PLAYING_AUDIO, ConversationState.PROCESSING_TTS]:
|
303 |
await session.handle_barge_in()
|
|
|
176 |
self.silence_threshold_ms = silence_threshold
|
177 |
|
178 |
async def initialize_stt(self):
|
179 |
+
"""Initialize STT provider"""
|
180 |
+
try:
|
181 |
+
self.stt_manager = STTFactory.create_provider()
|
182 |
+
if self.stt_manager and self.stt_manager.supports_realtime():
|
183 |
+
config = ConfigProvider.get().global_config.stt_provider.settings
|
184 |
+
|
185 |
+
# STTConfig nesnesi oluştur, dict değil!
|
186 |
+
from stt_interface import STTConfig
|
187 |
+
stt_config = STTConfig(
|
188 |
+
language=config.get("language", "tr-TR"),
|
189 |
+
interim_results=config.get("interim_results", True),
|
190 |
+
single_utterance=False,
|
191 |
+
enable_punctuation=config.get("enable_punctuation", True),
|
192 |
+
sample_rate=16000,
|
193 |
+
encoding="WEBM_OPUS",
|
194 |
+
model=config.get("model", "latest_long"),
|
195 |
+
use_enhanced=config.get("use_enhanced", True),
|
196 |
+
# Voice Activity Detection
|
197 |
+
vad_enabled=True,
|
198 |
+
speech_timeout_ms=config.get("speech_timeout_ms", 2000),
|
199 |
+
# Noise reduction
|
200 |
+
noise_reduction_enabled=True,
|
201 |
+
noise_reduction_level=config.get("noise_reduction_level", 2)
|
202 |
+
)
|
203 |
+
|
204 |
+
await self.stt_manager.start_streaming(stt_config)
|
205 |
+
log_info("STT manager initialized", session_id=self.session.session_id)
|
206 |
+
return True
|
207 |
+
else:
|
208 |
+
log_warning("STT provider does not support realtime", session_id=self.session.session_id)
|
209 |
+
return False
|
210 |
+
except Exception as e:
|
211 |
+
log_error(f"Failed to initialize STT", error=str(e), session_id=self.session.session_id)
|
212 |
+
# STT başarısız oldu, manager'ı None yap
|
213 |
+
self.stt_manager = None
|
214 |
return False
|
215 |
|
216 |
async def change_state(self, new_state: ConversationState):
|
|
|
270 |
# Initialize STT
|
271 |
stt_initialized = await realtime_session.initialize_stt()
|
272 |
if not stt_initialized:
|
273 |
+
# STT başarısız oldu, kullanıcıya bildir ve bağlantıyı kapat
|
274 |
await websocket.send_json({
|
275 |
"type": "error",
|
276 |
+
"message": "Speech-to-Text service initialization failed. Please check your configuration.",
|
277 |
+
"error_type": "stt_init_failed"
|
278 |
})
|
279 |
+
|
280 |
+
# Cleanup ve close
|
281 |
+
await realtime_session.cleanup()
|
282 |
+
await websocket.close()
|
283 |
+
return
|
284 |
+
|
285 |
+
# STT başarılı, devam et
|
286 |
try:
|
287 |
while True:
|
288 |
# Receive message
|
|
|
323 |
audio_data = message.get("data")
|
324 |
if not audio_data:
|
325 |
return
|
326 |
+
|
327 |
+
# STT manager kontrolü
|
328 |
+
if not session.stt_manager:
|
329 |
+
log_warning("No STT manager available, ignoring audio chunk", session_id=session.session.session_id)
|
330 |
+
await websocket.send_json({
|
331 |
+
"type": "error",
|
332 |
+
"message": "Speech recognition not available",
|
333 |
+
"error_type": "stt_unavailable"
|
334 |
+
})
|
335 |
+
return
|
336 |
+
|
337 |
# Check for barge-in during TTS/audio playback
|
338 |
if session.state in [ConversationState.PLAYING_AUDIO, ConversationState.PROCESSING_TTS]:
|
339 |
await session.handle_barge_in()
|