Spaces:
Building
Building
Update stt/stt_google.py
Browse files- stt/stt_google.py +44 -32
stt/stt_google.py
CHANGED
@@ -267,22 +267,22 @@ class GoogleCloudSTT(STTInterface):
|
|
267 |
await self.stop_streaming()
|
268 |
# Temizlik için bekle
|
269 |
await asyncio.sleep(0.5)
|
270 |
-
|
271 |
# Session verilerini resetle ve ID'yi artır
|
272 |
self._reset_session_data()
|
273 |
-
|
274 |
log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
|
275 |
-
|
276 |
# Fresh queue'lar oluştur
|
277 |
self._create_fresh_queues()
|
278 |
-
|
279 |
# Stop event'i temizle
|
280 |
self.stop_event.clear()
|
281 |
-
|
282 |
# Yeni client oluştur (TEK SEFER)
|
283 |
self.client = speech.SpeechClient()
|
284 |
log_info("✅ Created new Google Speech client")
|
285 |
-
|
286 |
# Convert dict to STTConfig if needed
|
287 |
if isinstance(config, dict):
|
288 |
stt_config = STTConfig(
|
@@ -290,29 +290,33 @@ class GoogleCloudSTT(STTInterface):
|
|
290 |
sample_rate=config.get("sample_rate", 16000),
|
291 |
encoding=config.get("encoding", "WEBM_OPUS"),
|
292 |
enable_punctuation=config.get("enable_punctuation", True),
|
293 |
-
interim_results=config.get("interim_results",
|
294 |
-
single_utterance=config.get("single_utterance",
|
295 |
)
|
296 |
else:
|
297 |
stt_config = config
|
298 |
-
|
299 |
recognition_config = speech.RecognitionConfig(
|
300 |
encoding=self._get_encoding(stt_config.encoding),
|
301 |
sample_rate_hertz=stt_config.sample_rate,
|
302 |
language_code=stt_config.language,
|
303 |
enable_automatic_punctuation=stt_config.enable_punctuation,
|
304 |
model="latest_long",
|
305 |
-
use_enhanced=True
|
|
|
|
|
|
|
306 |
)
|
307 |
-
|
308 |
self.streaming_config = speech.StreamingRecognitionConfig(
|
309 |
config=recognition_config,
|
310 |
interim_results=stt_config.interim_results,
|
311 |
single_utterance=stt_config.single_utterance
|
|
|
312 |
)
|
313 |
-
|
314 |
self.is_streaming = True
|
315 |
-
|
316 |
# Start streaming thread with unique name
|
317 |
self.stream_thread = threading.Thread(
|
318 |
target=self._run_stream,
|
@@ -320,9 +324,9 @@ class GoogleCloudSTT(STTInterface):
|
|
320 |
)
|
321 |
self.stream_thread.daemon = True # Daemon thread olarak işaretle
|
322 |
self.stream_thread.start()
|
323 |
-
|
324 |
log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
|
325 |
-
|
326 |
except Exception as e:
|
327 |
log_error(f"❌ Failed to start Google STT streaming", error=str(e))
|
328 |
self.is_streaming = False
|
@@ -364,8 +368,9 @@ class GoogleCloudSTT(STTInterface):
|
|
364 |
if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
|
365 |
log_info("✅ Valid WEBM header detected")
|
366 |
else:
|
367 |
-
log_error(f"❌ Invalid audio format")
|
368 |
-
|
|
|
369 |
|
370 |
# Her 50 chunk'ta durum raporu
|
371 |
if chunk_count % 50 == 0:
|
@@ -379,6 +384,8 @@ class GoogleCloudSTT(STTInterface):
|
|
379 |
log_error(f"❌ Error in request generator: {e}")
|
380 |
break
|
381 |
|
|
|
|
|
382 |
# Create streaming client
|
383 |
requests = request_generator()
|
384 |
log_info("🎤 Creating Google STT streaming client...")
|
@@ -390,26 +397,30 @@ class GoogleCloudSTT(STTInterface):
|
|
390 |
timeout=300
|
391 |
)
|
392 |
|
393 |
-
log_info("✅ Google STT streaming client created")
|
|
|
|
|
|
|
|
|
394 |
|
395 |
for response in responses:
|
|
|
|
|
|
|
|
|
|
|
396 |
if self.stop_event.is_set():
|
397 |
log_info("🛑 Stop event detected")
|
398 |
break
|
399 |
|
400 |
-
# Check for speech events (VAD)
|
401 |
-
if hasattr(response, 'speech_event_type'):
|
402 |
-
event_type = response.speech_event_type
|
403 |
-
if event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
|
404 |
-
log_info("🏁 Google STT: End of single utterance detected")
|
405 |
-
# Google otomatik olarak stream'i kapatacak
|
406 |
-
break
|
407 |
-
|
408 |
# Process results
|
409 |
if not response.results:
|
|
|
410 |
continue
|
411 |
|
412 |
for result in response.results:
|
|
|
|
|
413 |
if not result.alternatives:
|
414 |
continue
|
415 |
|
@@ -432,26 +443,27 @@ class GoogleCloudSTT(STTInterface):
|
|
432 |
|
433 |
# Single utterance modunda Google STT otomatik kapanır
|
434 |
if self.streaming_config.single_utterance:
|
435 |
-
log_info("✅ Single utterance mode -
|
436 |
-
#
|
|
|
437 |
else:
|
438 |
log_debug(f"📝 Interim: '{alternative.transcript}'")
|
439 |
|
440 |
-
log_info("📊 Google STT stream ended
|
441 |
|
442 |
except Exception as e:
|
443 |
error_msg = str(e)
|
444 |
|
445 |
-
#
|
446 |
if "iterating requests" in error_msg:
|
447 |
-
log_info("✅
|
448 |
elif "Exceeded maximum allowed stream duration" in error_msg:
|
449 |
log_warning("⚠️ Stream duration limit (5 min)")
|
450 |
else:
|
451 |
log_error(f"❌ Google STT error: {error_msg}")
|
452 |
|
453 |
except Exception as e:
|
454 |
-
log_error(f"❌ Fatal error in STT stream", error=str(e))
|
455 |
finally:
|
456 |
log_info("🎤 Google STT stream thread ended")
|
457 |
self.is_streaming = False
|
|
|
267 |
await self.stop_streaming()
|
268 |
# Temizlik için bekle
|
269 |
await asyncio.sleep(0.5)
|
270 |
+
|
271 |
# Session verilerini resetle ve ID'yi artır
|
272 |
self._reset_session_data()
|
273 |
+
|
274 |
log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
|
275 |
+
|
276 |
# Fresh queue'lar oluştur
|
277 |
self._create_fresh_queues()
|
278 |
+
|
279 |
# Stop event'i temizle
|
280 |
self.stop_event.clear()
|
281 |
+
|
282 |
# Yeni client oluştur (TEK SEFER)
|
283 |
self.client = speech.SpeechClient()
|
284 |
log_info("✅ Created new Google Speech client")
|
285 |
+
|
286 |
# Convert dict to STTConfig if needed
|
287 |
if isinstance(config, dict):
|
288 |
stt_config = STTConfig(
|
|
|
290 |
sample_rate=config.get("sample_rate", 16000),
|
291 |
encoding=config.get("encoding", "WEBM_OPUS"),
|
292 |
enable_punctuation=config.get("enable_punctuation", True),
|
293 |
+
interim_results=config.get("interim_results", False),
|
294 |
+
single_utterance=config.get("single_utterance", True)
|
295 |
)
|
296 |
else:
|
297 |
stt_config = config
|
298 |
+
|
299 |
recognition_config = speech.RecognitionConfig(
|
300 |
encoding=self._get_encoding(stt_config.encoding),
|
301 |
sample_rate_hertz=stt_config.sample_rate,
|
302 |
language_code=stt_config.language,
|
303 |
enable_automatic_punctuation=stt_config.enable_punctuation,
|
304 |
model="latest_long",
|
305 |
+
use_enhanced=True,
|
306 |
+
# Bu parametreleri kaldırıyoruz - v1 API'de yok
|
307 |
+
# enable_voice_activity_events=True,
|
308 |
+
# audio_channel_count=1
|
309 |
)
|
310 |
+
|
311 |
self.streaming_config = speech.StreamingRecognitionConfig(
|
312 |
config=recognition_config,
|
313 |
interim_results=stt_config.interim_results,
|
314 |
single_utterance=stt_config.single_utterance
|
315 |
+
# enable_voice_activity_events kaldırıldı
|
316 |
)
|
317 |
+
|
318 |
self.is_streaming = True
|
319 |
+
|
320 |
# Start streaming thread with unique name
|
321 |
self.stream_thread = threading.Thread(
|
322 |
target=self._run_stream,
|
|
|
324 |
)
|
325 |
self.stream_thread.daemon = True # Daemon thread olarak işaretle
|
326 |
self.stream_thread.start()
|
327 |
+
|
328 |
log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
|
329 |
+
|
330 |
except Exception as e:
|
331 |
log_error(f"❌ Failed to start Google STT streaming", error=str(e))
|
332 |
self.is_streaming = False
|
|
|
368 |
if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
|
369 |
log_info("✅ Valid WEBM header detected")
|
370 |
else:
|
371 |
+
log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
|
372 |
+
# Format hatalıysa devam et, Google STT düzeltebilir
|
373 |
+
# break
|
374 |
|
375 |
# Her 50 chunk'ta durum raporu
|
376 |
if chunk_count % 50 == 0:
|
|
|
384 |
log_error(f"❌ Error in request generator: {e}")
|
385 |
break
|
386 |
|
387 |
+
log_info(f"📊 Request generator finished. Total chunks: {chunk_count}, Total bytes: {total_bytes}")
|
388 |
+
|
389 |
# Create streaming client
|
390 |
requests = request_generator()
|
391 |
log_info("🎤 Creating Google STT streaming client...")
|
|
|
397 |
timeout=300
|
398 |
)
|
399 |
|
400 |
+
log_info("✅ Google STT streaming client created, waiting for responses...")
|
401 |
+
|
402 |
+
# Process responses
|
403 |
+
response_count = 0
|
404 |
+
result_count = 0
|
405 |
|
406 |
for response in responses:
|
407 |
+
response_count += 1
|
408 |
+
|
409 |
+
if response_count == 1:
|
410 |
+
log_info(f"📨 First response received from Google STT")
|
411 |
+
|
412 |
if self.stop_event.is_set():
|
413 |
log_info("🛑 Stop event detected")
|
414 |
break
|
415 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
416 |
# Process results
|
417 |
if not response.results:
|
418 |
+
log_debug(f"📭 Response #{response_count} has no results")
|
419 |
continue
|
420 |
|
421 |
for result in response.results:
|
422 |
+
result_count += 1
|
423 |
+
|
424 |
if not result.alternatives:
|
425 |
continue
|
426 |
|
|
|
443 |
|
444 |
# Single utterance modunda Google STT otomatik kapanır
|
445 |
if self.streaming_config.single_utterance:
|
446 |
+
log_info("✅ Single utterance mode - Stream will end")
|
447 |
+
# Google stream'i kapatacak, biz de çıkalım
|
448 |
+
return
|
449 |
else:
|
450 |
log_debug(f"📝 Interim: '{alternative.transcript}'")
|
451 |
|
452 |
+
log_info(f"📊 Google STT stream ended. Responses: {response_count}, Results: {result_count}")
|
453 |
|
454 |
except Exception as e:
|
455 |
error_msg = str(e)
|
456 |
|
457 |
+
# Beklenen hatalar
|
458 |
if "iterating requests" in error_msg:
|
459 |
+
log_info("✅ Stream ended normally")
|
460 |
elif "Exceeded maximum allowed stream duration" in error_msg:
|
461 |
log_warning("⚠️ Stream duration limit (5 min)")
|
462 |
else:
|
463 |
log_error(f"❌ Google STT error: {error_msg}")
|
464 |
|
465 |
except Exception as e:
|
466 |
+
log_error(f"❌ Fatal error in STT stream", error=str(e), traceback=traceback.format_exc())
|
467 |
finally:
|
468 |
log_info("🎤 Google STT stream thread ended")
|
469 |
self.is_streaming = False
|