ciyidogan commited on
Commit
4e4dedc
·
verified ·
1 Parent(s): 01c853f

Update stt/stt_deepgram.py

Browse files
Files changed (1) hide show
  1. stt/stt_deepgram.py +22 -3
stt/stt_deepgram.py CHANGED
@@ -44,6 +44,9 @@ class DeepgramSTT(STTInterface):
44
  # Final result tracking
45
  self.final_result_received = False
46
  self.stop_event = threading.Event()
 
 
 
47
 
48
  log_info(f"✅ Deepgram STT initialized (SDK version)")
49
 
@@ -279,6 +282,10 @@ class DeepgramSTT(STTInterface):
279
  raise RuntimeError("Streaming not started. Call start_streaming() first.")
280
 
281
  try:
 
 
 
 
282
  # İlk birkaç chunk için audio formatını analiz et
283
  if self.total_chunks < 3:
284
  if len(audio_chunk) >= 4:
@@ -288,9 +295,18 @@ class DeepgramSTT(STTInterface):
288
  log_info(f"🔊 Audio format check - Chunk #{self.total_chunks}: First sample={first_sample}, Size={len(audio_chunk)} bytes")
289
  except:
290
  log_warning("⚠️ Could not parse as Linear16")
291
-
292
- # Send audio to Deepgram (final result gelse bile gönder, Deepgram kendi handle edecek)
293
- self.live_connection.send(audio_chunk)
 
 
 
 
 
 
 
 
 
294
 
295
  self.total_chunks += 1
296
  self.total_audio_bytes += len(audio_chunk)
@@ -379,6 +395,9 @@ class DeepgramSTT(STTInterface):
379
  self.total_chunks = 0
380
  self.session_id += 1
381
  self.final_result_received = False
 
 
 
382
 
383
  log_debug(f"🔄 Session data reset. New session ID: {self.session_id}")
384
 
 
44
  # Final result tracking
45
  self.final_result_received = False
46
  self.stop_event = threading.Event()
47
+
48
+ # ✅ Initial buffer for better VAD context
49
+ self.initial_buffer = []
50
 
51
  log_info(f"✅ Deepgram STT initialized (SDK version)")
52
 
 
282
  raise RuntimeError("Streaming not started. Call start_streaming() first.")
283
 
284
  try:
285
+ # ✅ İlk birkaç chunk'ı biriktirip gönder (daha iyi context)
286
+ if not hasattr(self, 'initial_buffer'):
287
+ self.initial_buffer = []
288
+
289
  # İlk birkaç chunk için audio formatını analiz et
290
  if self.total_chunks < 3:
291
  if len(audio_chunk) >= 4:
 
295
  log_info(f"🔊 Audio format check - Chunk #{self.total_chunks}: First sample={first_sample}, Size={len(audio_chunk)} bytes")
296
  except:
297
  log_warning("⚠️ Could not parse as Linear16")
298
+
299
+ self.initial_buffer.append(audio_chunk)
300
+
301
+ # 3. chunk'ta hepsini birden gönder
302
+ if self.total_chunks == 2:
303
+ combined_audio = b''.join(self.initial_buffer)
304
+ self.live_connection.send(combined_audio)
305
+ self.initial_buffer = []
306
+ log_info(f"🎯 Sent initial audio buffer: {len(combined_audio)} bytes")
307
+ else:
308
+ # Send audio to Deepgram (final result gelse bile gönder, Deepgram kendi handle edecek)
309
+ self.live_connection.send(audio_chunk)
310
 
311
  self.total_chunks += 1
312
  self.total_audio_bytes += len(audio_chunk)
 
395
  self.total_chunks = 0
396
  self.session_id += 1
397
  self.final_result_received = False
398
+
399
+ # ✅ Clear initial buffer
400
+ self.initial_buffer = []
401
 
402
  log_debug(f"🔄 Session data reset. New session ID: {self.session_id}")
403