Spaces:
Building
Building
Update stt/stt_google.py
Browse files- stt/stt_google.py +38 -86
stt/stt_google.py
CHANGED
@@ -347,10 +347,7 @@ class GoogleCloudSTT(STTInterface):
|
|
347 |
"""Generate streaming requests"""
|
348 |
chunk_count = 0
|
349 |
total_bytes = 0
|
350 |
-
|
351 |
-
last_chunk_time = time.time()
|
352 |
-
silence_timeout = 2.0 # 2 saniye sessizlik timeout
|
353 |
-
|
354 |
while not self.stop_event.is_set():
|
355 |
try:
|
356 |
chunk = self.audio_queue.get(timeout=0.1)
|
@@ -360,37 +357,23 @@ class GoogleCloudSTT(STTInterface):
|
|
360 |
|
361 |
chunk_count += 1
|
362 |
total_bytes += len(chunk)
|
363 |
-
last_chunk_time = time.time() # Update last chunk time
|
364 |
|
365 |
# İlk chunk'ta audio format kontrolü
|
366 |
if chunk_count == 1:
|
367 |
log_info(f"📤 First chunk - size: {len(chunk)} bytes")
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
else:
|
374 |
-
log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
|
375 |
-
# Hatalı format, stream'i durdur
|
376 |
-
break
|
377 |
-
|
378 |
-
# İlk chunk geçerliyse devam et
|
379 |
-
if chunk_count == 1 and not first_chunk_processed:
|
380 |
-
break
|
381 |
|
382 |
-
# Her
|
383 |
-
if chunk_count %
|
384 |
-
|
385 |
-
log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total, avg {avg_chunk_size:.0f} bytes/chunk")
|
386 |
|
387 |
yield speech.StreamingRecognizeRequest(audio_content=chunk)
|
388 |
|
389 |
except queue.Empty:
|
390 |
-
# Check for silence timeout
|
391 |
-
if time.time() - last_chunk_time > silence_timeout:
|
392 |
-
log_info(f"🔇 Silence timeout reached ({silence_timeout}s), ending stream")
|
393 |
-
break
|
394 |
continue
|
395 |
except Exception as e:
|
396 |
log_error(f"❌ Error in request generator: {e}")
|
@@ -398,108 +381,77 @@ class GoogleCloudSTT(STTInterface):
|
|
398 |
|
399 |
# Create streaming client
|
400 |
requests = request_generator()
|
401 |
-
|
402 |
log_info("🎤 Creating Google STT streaming client...")
|
403 |
|
404 |
try:
|
405 |
responses = self.client.streaming_recognize(
|
406 |
self.streaming_config,
|
407 |
requests,
|
408 |
-
timeout=300
|
409 |
)
|
410 |
|
411 |
log_info("✅ Google STT streaming client created")
|
412 |
|
413 |
-
# Response timeout kontrolü
|
414 |
-
last_response_time = time.time()
|
415 |
-
RESPONSE_TIMEOUT = 30 # 30 saniye içinde response gelmezse
|
416 |
-
|
417 |
-
# Process responses
|
418 |
-
response_count = 0
|
419 |
-
empty_response_count = 0
|
420 |
-
|
421 |
for response in responses:
|
422 |
-
last_response_time = time.time()
|
423 |
-
response_count += 1
|
424 |
-
|
425 |
-
# Response type'ı logla
|
426 |
-
if response_count == 1:
|
427 |
-
log_info(f"📨 First response received from Google STT")
|
428 |
-
|
429 |
if self.stop_event.is_set():
|
430 |
-
log_info("🛑 Stop event detected
|
431 |
break
|
432 |
|
433 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
if not response.results:
|
435 |
-
empty_response_count += 1
|
436 |
-
if empty_response_count == 1:
|
437 |
-
log_debug("📭 Received empty response (no results)")
|
438 |
continue
|
439 |
|
440 |
-
for
|
441 |
-
log_debug(f"📋 Result {i}: is_final={result.is_final}, alternatives={len(result.alternatives)}")
|
442 |
-
|
443 |
if not result.alternatives:
|
444 |
-
log_debug(f"📋 Result {i} has no alternatives")
|
445 |
continue
|
446 |
|
447 |
-
# İlk alternatifi al
|
448 |
alternative = result.alternatives[0]
|
449 |
-
|
450 |
-
# Sadece anlamlı text'leri işle
|
451 |
if alternative.transcript.strip():
|
452 |
# Create transcription result
|
453 |
transcription = TranscriptionResult(
|
454 |
text=alternative.transcript,
|
455 |
is_final=result.is_final,
|
456 |
-
confidence=
|
457 |
timestamp=datetime.now().timestamp()
|
458 |
)
|
459 |
|
460 |
# Put result in queue
|
461 |
self._put_result(transcription)
|
462 |
|
463 |
-
# SADECE final result'ları logla
|
464 |
if result.is_final:
|
465 |
-
log_info(f"🎯
|
466 |
|
467 |
-
#
|
468 |
if self.streaming_config.single_utterance:
|
469 |
-
log_info("
|
470 |
-
#
|
471 |
-
|
472 |
-
|
473 |
-
return
|
474 |
-
else:
|
475 |
-
log_debug(f"📋 Result {i} has empty transcript")
|
476 |
-
continue
|
477 |
|
478 |
-
|
479 |
-
log_error(f"❌ No response from Google STT for {RESPONSE_TIMEOUT} seconds")
|
480 |
-
|
481 |
-
log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}")
|
482 |
|
483 |
except Exception as e:
|
484 |
error_msg = str(e)
|
485 |
-
|
486 |
-
#
|
487 |
-
if "
|
488 |
-
|
489 |
-
elif "
|
490 |
-
|
491 |
-
elif "invalid_argument" in error_msg:
|
492 |
-
log_error(f"❌ Invalid STT configuration. Check encoding and sample rate.")
|
493 |
-
elif "Deadline Exceeded" in error_msg:
|
494 |
-
log_error(f"❌ Google STT response timeout - possibly network issue or slow connection")
|
495 |
-
elif "503" in error_msg or "Service Unavailable" in error_msg:
|
496 |
-
log_error(f"❌ Google STT service temporarily unavailable. Will retry...")
|
497 |
else:
|
498 |
-
log_error(f"❌ Google STT
|
499 |
|
500 |
except Exception as e:
|
501 |
-
log_error(f"❌ Fatal error in STT stream
|
502 |
finally:
|
503 |
log_info("🎤 Google STT stream thread ended")
|
504 |
-
# Thread bittiğinde streaming flag'ini kapat
|
505 |
self.is_streaming = False
|
|
|
347 |
"""Generate streaming requests"""
|
348 |
chunk_count = 0
|
349 |
total_bytes = 0
|
350 |
+
|
|
|
|
|
|
|
351 |
while not self.stop_event.is_set():
|
352 |
try:
|
353 |
chunk = self.audio_queue.get(timeout=0.1)
|
|
|
357 |
|
358 |
chunk_count += 1
|
359 |
total_bytes += len(chunk)
|
|
|
360 |
|
361 |
# İlk chunk'ta audio format kontrolü
|
362 |
if chunk_count == 1:
|
363 |
log_info(f"📤 First chunk - size: {len(chunk)} bytes")
|
364 |
+
if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
|
365 |
+
log_info("✅ Valid WEBM header detected")
|
366 |
+
else:
|
367 |
+
log_error(f"❌ Invalid audio format")
|
368 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
|
370 |
+
# Her 50 chunk'ta durum raporu
|
371 |
+
if chunk_count % 50 == 0:
|
372 |
+
log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total")
|
|
|
373 |
|
374 |
yield speech.StreamingRecognizeRequest(audio_content=chunk)
|
375 |
|
376 |
except queue.Empty:
|
|
|
|
|
|
|
|
|
377 |
continue
|
378 |
except Exception as e:
|
379 |
log_error(f"❌ Error in request generator: {e}")
|
|
|
381 |
|
382 |
# Create streaming client
|
383 |
requests = request_generator()
|
|
|
384 |
log_info("🎤 Creating Google STT streaming client...")
|
385 |
|
386 |
try:
|
387 |
responses = self.client.streaming_recognize(
|
388 |
self.streaming_config,
|
389 |
requests,
|
390 |
+
timeout=300
|
391 |
)
|
392 |
|
393 |
log_info("✅ Google STT streaming client created")
|
394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
for response in responses:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
if self.stop_event.is_set():
|
397 |
+
log_info("🛑 Stop event detected")
|
398 |
break
|
399 |
|
400 |
+
# Check for speech events (VAD)
|
401 |
+
if hasattr(response, 'speech_event_type'):
|
402 |
+
event_type = response.speech_event_type
|
403 |
+
if event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
|
404 |
+
log_info("🏁 Google STT: End of single utterance detected")
|
405 |
+
# Google otomatik olarak stream'i kapatacak
|
406 |
+
break
|
407 |
+
|
408 |
+
# Process results
|
409 |
if not response.results:
|
|
|
|
|
|
|
410 |
continue
|
411 |
|
412 |
+
for result in response.results:
|
|
|
|
|
413 |
if not result.alternatives:
|
|
|
414 |
continue
|
415 |
|
|
|
416 |
alternative = result.alternatives[0]
|
417 |
+
|
|
|
418 |
if alternative.transcript.strip():
|
419 |
# Create transcription result
|
420 |
transcription = TranscriptionResult(
|
421 |
text=alternative.transcript,
|
422 |
is_final=result.is_final,
|
423 |
+
confidence=getattr(alternative, 'confidence', 0.0),
|
424 |
timestamp=datetime.now().timestamp()
|
425 |
)
|
426 |
|
427 |
# Put result in queue
|
428 |
self._put_result(transcription)
|
429 |
|
|
|
430 |
if result.is_final:
|
431 |
+
log_info(f"🎯 FINAL TRANSCRIPT: '{alternative.transcript}'")
|
432 |
|
433 |
+
# Single utterance modunda Google STT otomatik kapanır
|
434 |
if self.streaming_config.single_utterance:
|
435 |
+
log_info("✅ Single utterance mode - Google STT will close stream")
|
436 |
+
# Loop otomatik sonlanacak
|
437 |
+
else:
|
438 |
+
log_debug(f"📝 Interim: '{alternative.transcript}'")
|
|
|
|
|
|
|
|
|
439 |
|
440 |
+
log_info("📊 Google STT stream ended normally")
|
|
|
|
|
|
|
441 |
|
442 |
except Exception as e:
|
443 |
error_msg = str(e)
|
444 |
+
|
445 |
+
# Google STT'nin normal kapanma durumları
|
446 |
+
if "iterating requests" in error_msg:
|
447 |
+
log_info("✅ Google STT stream closed normally (end of utterance)")
|
448 |
+
elif "Exceeded maximum allowed stream duration" in error_msg:
|
449 |
+
log_warning("⚠️ Stream duration limit (5 min)")
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
else:
|
451 |
+
log_error(f"❌ Google STT error: {error_msg}")
|
452 |
|
453 |
except Exception as e:
|
454 |
+
log_error(f"❌ Fatal error in STT stream", error=str(e))
|
455 |
finally:
|
456 |
log_info("🎤 Google STT stream thread ended")
|
|
|
457 |
self.is_streaming = False
|