Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 8 days ago

Commit

4e4dedc

verified ·

1 Parent(s): 01c853f

Update stt/stt_deepgram.py

Browse files

Files changed (1) hide show

stt/stt_deepgram.py +22 -3

stt/stt_deepgram.py CHANGED Viewed

@@ -44,6 +44,9 @@ class DeepgramSTT(STTInterface):
         # Final result tracking
         self.final_result_received = False
         self.stop_event = threading.Event()
         log_info(f"✅ Deepgram STT initialized (SDK version)")
@@ -279,6 +282,10 @@ class DeepgramSTT(STTInterface):
             raise RuntimeError("Streaming not started. Call start_streaming() first.")
         try:
             # İlk birkaç chunk için audio formatını analiz et
             if self.total_chunks < 3:
                 if len(audio_chunk) >= 4:
@@ -288,9 +295,18 @@ class DeepgramSTT(STTInterface):
                         log_info(f"🔊 Audio format check - Chunk #{self.total_chunks}: First sample={first_sample}, Size={len(audio_chunk)} bytes")
                     except:
                         log_warning("⚠️ Could not parse as Linear16")
-            # Send audio to Deepgram (final result gelse bile gönder, Deepgram kendi handle edecek)
-            self.live_connection.send(audio_chunk)
             self.total_chunks += 1
             self.total_audio_bytes += len(audio_chunk)
@@ -379,6 +395,9 @@ class DeepgramSTT(STTInterface):
         self.total_chunks = 0
         self.session_id += 1
         self.final_result_received = False
         log_debug(f"🔄 Session data reset. New session ID: {self.session_id}")

         # Final result tracking
         self.final_result_received = False
         self.stop_event = threading.Event()
+        # ✅ Initial buffer for better VAD context
+        self.initial_buffer = []
         log_info(f"✅ Deepgram STT initialized (SDK version)")
             raise RuntimeError("Streaming not started. Call start_streaming() first.")
         try:
+            # ✅ İlk birkaç chunk'ı biriktirip gönder (daha iyi context)
+            if not hasattr(self, 'initial_buffer'):
+                self.initial_buffer = []
             # İlk birkaç chunk için audio formatını analiz et
             if self.total_chunks < 3:
                 if len(audio_chunk) >= 4:
                         log_info(f"🔊 Audio format check - Chunk #{self.total_chunks}: First sample={first_sample}, Size={len(audio_chunk)} bytes")
                     except:
                         log_warning("⚠️ Could not parse as Linear16")
+                self.initial_buffer.append(audio_chunk)
+                # 3. chunk'ta hepsini birden gönder
+                if self.total_chunks == 2:
+                    combined_audio = b''.join(self.initial_buffer)
+                    self.live_connection.send(combined_audio)
+                    self.initial_buffer = []
+                    log_info(f"🎯 Sent initial audio buffer: {len(combined_audio)} bytes")
+            else:
+                # Send audio to Deepgram (final result gelse bile gönder, Deepgram kendi handle edecek)
+                self.live_connection.send(audio_chunk)
             self.total_chunks += 1
             self.total_audio_bytes += len(audio_chunk)
         self.total_chunks = 0
         self.session_id += 1
         self.final_result_received = False
+        # ✅ Clear initial buffer
+        self.initial_buffer = []
         log_debug(f"🔄 Session data reset. New session ID: {self.session_id}")