Spaces:
Building
Building
Update stt_google.py
Browse files- stt_google.py +36 -52
stt_google.py
CHANGED
@@ -202,7 +202,6 @@ class GoogleCloudSTT(STTInterface):
|
|
202 |
"""Generate streaming requests"""
|
203 |
chunk_count = 0
|
204 |
total_bytes = 0
|
205 |
-
first_chunk_processed = False
|
206 |
|
207 |
while not self.stop_event.is_set():
|
208 |
try:
|
@@ -214,22 +213,9 @@ class GoogleCloudSTT(STTInterface):
|
|
214 |
chunk_count += 1
|
215 |
total_bytes += len(chunk)
|
216 |
|
217 |
-
# İlk chunk'
|
218 |
if chunk_count == 1:
|
219 |
-
log_info(f"📤 First chunk - size: {len(chunk)} bytes")
|
220 |
-
# Audio header kontrolü (WEBM magic bytes)
|
221 |
-
if len(chunk) >= 4:
|
222 |
-
if chunk[:4] == b'\x1a\x45\xdf\xa3':
|
223 |
-
log_info("✅ Valid WEBM header detected")
|
224 |
-
first_chunk_processed = True
|
225 |
-
else:
|
226 |
-
log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
|
227 |
-
# Hatalı format, stream'i durdur
|
228 |
-
break
|
229 |
-
|
230 |
-
# İlk chunk geçerliyse devam et
|
231 |
-
if chunk_count == 1 and not first_chunk_processed:
|
232 |
-
break
|
233 |
|
234 |
# Her 100 chunk'ta durum raporu
|
235 |
if chunk_count % 100 == 0:
|
@@ -281,51 +267,49 @@ class GoogleCloudSTT(STTInterface):
|
|
281 |
# Response içeriğini kontrol et
|
282 |
if not response.results:
|
283 |
empty_response_count += 1
|
284 |
-
if empty_response_count ==
|
285 |
-
log_debug("
|
286 |
continue
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
log_debug(f"📋 Result {i} has no alternatives")
|
293 |
-
continue
|
294 |
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
transcription = TranscriptionResult(
|
302 |
text=alternative.transcript,
|
303 |
is_final=result.is_final,
|
304 |
-
confidence=alternative.confidence if hasattr(alternative, 'confidence')
|
305 |
-
timestamp=datetime.now().timestamp()
|
306 |
)
|
307 |
|
308 |
-
#
|
309 |
-
self._put_result(transcription)
|
310 |
-
|
311 |
-
# SADECE final result'ları logla
|
312 |
if result.is_final:
|
313 |
-
log_info(f"🎯 GOOGLE STT FINAL: '{
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
|
|
|
|
|
|
321 |
log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}")
|
322 |
-
|
323 |
-
except Exception as e:
|
324 |
-
error_msg = str(e)
|
325 |
|
326 |
-
|
327 |
-
|
328 |
-
|
|
|
|
|
|
|
|
|
|
|
329 |
elif "Bad language code" in error_msg:
|
330 |
log_error(f"❌ Invalid language code in STT config. Check locale settings.")
|
331 |
elif "invalid_argument" in error_msg:
|
|
|
202 |
"""Generate streaming requests"""
|
203 |
chunk_count = 0
|
204 |
total_bytes = 0
|
|
|
205 |
|
206 |
while not self.stop_event.is_set():
|
207 |
try:
|
|
|
213 |
chunk_count += 1
|
214 |
total_bytes += len(chunk)
|
215 |
|
216 |
+
# İlk chunk log'u
|
217 |
if chunk_count == 1:
|
218 |
+
log_info(f"📤 First audio chunk to Google STT - size: {len(chunk)} bytes")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
# Her 100 chunk'ta durum raporu
|
221 |
if chunk_count % 100 == 0:
|
|
|
267 |
# Response içeriğini kontrol et
|
268 |
if not response.results:
|
269 |
empty_response_count += 1
|
270 |
+
if empty_response_count % 10 == 0:
|
271 |
+
log_debug(f"Empty responses received: {empty_response_count}")
|
272 |
continue
|
273 |
+
|
274 |
+
# Her result'ı işle
|
275 |
+
for result_index, result in enumerate(response.results):
|
276 |
+
if result.alternatives:
|
277 |
+
alternative = result.alternatives[0]
|
|
|
|
|
278 |
|
279 |
+
# Sonucu logla (sadece debug modda interim)
|
280 |
+
if result.is_final or self.debug_mode:
|
281 |
+
log_debug(f"📋 Result {result_index}: is_final={result.is_final}, alternatives={len(result.alternatives)}")
|
282 |
+
|
283 |
+
# TranscriptionResult oluştur ve queue'ya ekle
|
284 |
+
transcription_result = TranscriptionResult(
|
|
|
285 |
text=alternative.transcript,
|
286 |
is_final=result.is_final,
|
287 |
+
confidence=alternative.confidence if hasattr(alternative, 'confidence') else 0.0
|
|
|
288 |
)
|
289 |
|
290 |
+
# Final result'ları her zaman logla
|
|
|
|
|
|
|
291 |
if result.is_final:
|
292 |
+
log_info(f"🎯 GOOGLE STT FINAL: '{transcription_result.text}'")
|
293 |
+
|
294 |
+
# Queue'ya ekle
|
295 |
+
self._put_result(transcription_result)
|
296 |
+
|
297 |
+
# Eğer final result ise stream'i durdur
|
298 |
+
if result.is_final and self.single_utterance:
|
299 |
+
log_info("🏁 Final result received with single_utterance=True, stopping stream")
|
300 |
+
self.stop_event.set()
|
301 |
+
break
|
302 |
+
|
303 |
log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}")
|
|
|
|
|
|
|
304 |
|
305 |
+
except grpc.RpcError as e:
|
306 |
+
error_details = e.details() if hasattr(e, 'details') else str(e)
|
307 |
+
error_code = e.code() if hasattr(e, 'code') else None
|
308 |
+
error_msg = f"gRPC error - Code: {error_code}, Details: {error_details}"
|
309 |
+
|
310 |
+
# Spesifik hata durumlarını kontrol et
|
311 |
+
if "Audio Timeout Error" in error_details or "stream duration" in error_details:
|
312 |
+
log_info(f"⏱️ Google STT stream timeout after long duration. This is expected for long sessions.")
|
313 |
elif "Bad language code" in error_msg:
|
314 |
log_error(f"❌ Invalid language code in STT config. Check locale settings.")
|
315 |
elif "invalid_argument" in error_msg:
|