Spaces:
Running
Running
Update stt/stt_deepgram.py
Browse files- stt/stt_deepgram.py +29 -0
stt/stt_deepgram.py
CHANGED
@@ -178,7 +178,19 @@ class DeepgramSTT(STTInterface):
|
|
178 |
if not result:
|
179 |
log_warning("⚠️ No result in transcript event")
|
180 |
return
|
|
|
|
|
|
|
|
|
|
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
# Access properties directly from the result object
|
183 |
is_final = result.is_final if hasattr(result, 'is_final') else False
|
184 |
|
@@ -257,6 +269,23 @@ class DeepgramSTT(STTInterface):
|
|
257 |
return
|
258 |
|
259 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
# Send audio to Deepgram
|
261 |
self.live_connection.send(audio_chunk)
|
262 |
|
|
|
178 |
if not result:
|
179 |
log_warning("⚠️ No result in transcript event")
|
180 |
return
|
181 |
+
|
182 |
+
# ✅ Debug için result objesini detaylı inceleyin
|
183 |
+
if self.total_chunks < 5: # İlk birkaç event için
|
184 |
+
log_debug(f"🔍 Result object type: {type(result)}")
|
185 |
+
log_debug(f"🔍 Result dir: {[attr for attr in dir(result) if not attr.startswith('_')]}")
|
186 |
|
187 |
+
# Result'un tüm property'lerini logla
|
188 |
+
try:
|
189 |
+
if hasattr(result, '__dict__'):
|
190 |
+
log_debug(f"🔍 Result dict: {result.__dict__}")
|
191 |
+
except:
|
192 |
+
pass
|
193 |
+
|
194 |
# Access properties directly from the result object
|
195 |
is_final = result.is_final if hasattr(result, 'is_final') else False
|
196 |
|
|
|
269 |
return
|
270 |
|
271 |
try:
|
272 |
+
# ✅ İlk birkaç chunk için audio formatını analiz et
|
273 |
+
if self.total_chunks < 3:
|
274 |
+
# Linear16 formatı kontrolü
|
275 |
+
if len(audio_chunk) >= 4:
|
276 |
+
# İlk 2 byte'ı int16 olarak oku
|
277 |
+
import struct
|
278 |
+
try:
|
279 |
+
# Linear16 ise ilk sample'ı okuyabilmeliyiz
|
280 |
+
first_sample = struct.unpack('<h', audio_chunk[:2])[0]
|
281 |
+
log_info(f"🔊 Audio format check - Chunk #{self.total_chunks}: First sample={first_sample}, Size={len(audio_chunk)} bytes")
|
282 |
+
|
283 |
+
# Eğer WebM/Opus ise magic bytes farklı olur
|
284 |
+
if audio_chunk[:4] == b'webm' or audio_chunk[:4] == b'\x1a\x45\xdf\xa3':
|
285 |
+
log_error("❌ WebM format detected instead of Linear16!")
|
286 |
+
except:
|
287 |
+
log_warning("⚠️ Could not parse as Linear16")
|
288 |
+
|
289 |
# Send audio to Deepgram
|
290 |
self.live_connection.send(audio_chunk)
|
291 |
|