Spaces:
Building
Building
Update stt/stt_deepgram.py
Browse files- stt/stt_deepgram.py +22 -3
stt/stt_deepgram.py
CHANGED
@@ -44,6 +44,9 @@ class DeepgramSTT(STTInterface):
|
|
44 |
# Final result tracking
|
45 |
self.final_result_received = False
|
46 |
self.stop_event = threading.Event()
|
|
|
|
|
|
|
47 |
|
48 |
log_info(f"✅ Deepgram STT initialized (SDK version)")
|
49 |
|
@@ -279,6 +282,10 @@ class DeepgramSTT(STTInterface):
|
|
279 |
raise RuntimeError("Streaming not started. Call start_streaming() first.")
|
280 |
|
281 |
try:
|
|
|
|
|
|
|
|
|
282 |
# İlk birkaç chunk için audio formatını analiz et
|
283 |
if self.total_chunks < 3:
|
284 |
if len(audio_chunk) >= 4:
|
@@ -288,9 +295,18 @@ class DeepgramSTT(STTInterface):
|
|
288 |
log_info(f"🔊 Audio format check - Chunk #{self.total_chunks}: First sample={first_sample}, Size={len(audio_chunk)} bytes")
|
289 |
except:
|
290 |
log_warning("⚠️ Could not parse as Linear16")
|
291 |
-
|
292 |
-
|
293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
|
295 |
self.total_chunks += 1
|
296 |
self.total_audio_bytes += len(audio_chunk)
|
@@ -379,6 +395,9 @@ class DeepgramSTT(STTInterface):
|
|
379 |
self.total_chunks = 0
|
380 |
self.session_id += 1
|
381 |
self.final_result_received = False
|
|
|
|
|
|
|
382 |
|
383 |
log_debug(f"🔄 Session data reset. New session ID: {self.session_id}")
|
384 |
|
|
|
44 |
# Final result tracking
|
45 |
self.final_result_received = False
|
46 |
self.stop_event = threading.Event()
|
47 |
+
|
48 |
+
# ✅ Initial buffer for better VAD context
|
49 |
+
self.initial_buffer = []
|
50 |
|
51 |
log_info(f"✅ Deepgram STT initialized (SDK version)")
|
52 |
|
|
|
282 |
raise RuntimeError("Streaming not started. Call start_streaming() first.")
|
283 |
|
284 |
try:
|
285 |
+
# ✅ İlk birkaç chunk'ı biriktirip gönder (daha iyi context)
|
286 |
+
if not hasattr(self, 'initial_buffer'):
|
287 |
+
self.initial_buffer = []
|
288 |
+
|
289 |
# İlk birkaç chunk için audio formatını analiz et
|
290 |
if self.total_chunks < 3:
|
291 |
if len(audio_chunk) >= 4:
|
|
|
295 |
log_info(f"🔊 Audio format check - Chunk #{self.total_chunks}: First sample={first_sample}, Size={len(audio_chunk)} bytes")
|
296 |
except:
|
297 |
log_warning("⚠️ Could not parse as Linear16")
|
298 |
+
|
299 |
+
self.initial_buffer.append(audio_chunk)
|
300 |
+
|
301 |
+
# 3. chunk'ta hepsini birden gönder
|
302 |
+
if self.total_chunks == 2:
|
303 |
+
combined_audio = b''.join(self.initial_buffer)
|
304 |
+
self.live_connection.send(combined_audio)
|
305 |
+
self.initial_buffer = []
|
306 |
+
log_info(f"🎯 Sent initial audio buffer: {len(combined_audio)} bytes")
|
307 |
+
else:
|
308 |
+
# Send audio to Deepgram (final result gelse bile gönder, Deepgram kendi handle edecek)
|
309 |
+
self.live_connection.send(audio_chunk)
|
310 |
|
311 |
self.total_chunks += 1
|
312 |
self.total_audio_bytes += len(audio_chunk)
|
|
|
395 |
self.total_chunks = 0
|
396 |
self.session_id += 1
|
397 |
self.final_result_received = False
|
398 |
+
|
399 |
+
# ✅ Clear initial buffer
|
400 |
+
self.initial_buffer = []
|
401 |
|
402 |
log_debug(f"🔄 Session data reset. New session ID: {self.session_id}")
|
403 |
|