ciyidogan commited on
Commit
7022f3d
·
verified ·
1 Parent(s): 5c3c2cf

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +38 -86
stt/stt_google.py CHANGED
@@ -347,10 +347,7 @@ class GoogleCloudSTT(STTInterface):
347
  """Generate streaming requests"""
348
  chunk_count = 0
349
  total_bytes = 0
350
- first_chunk_processed = False
351
- last_chunk_time = time.time()
352
- silence_timeout = 2.0 # 2 saniye sessizlik timeout
353
-
354
  while not self.stop_event.is_set():
355
  try:
356
  chunk = self.audio_queue.get(timeout=0.1)
@@ -360,37 +357,23 @@ class GoogleCloudSTT(STTInterface):
360
 
361
  chunk_count += 1
362
  total_bytes += len(chunk)
363
- last_chunk_time = time.time() # Update last chunk time
364
 
365
  # İlk chunk'ta audio format kontrolü
366
  if chunk_count == 1:
367
  log_info(f"📤 First chunk - size: {len(chunk)} bytes")
368
- # Audio header kontrolü (WEBM magic bytes)
369
- if len(chunk) >= 4:
370
- if chunk[:4] == b'\x1a\x45\xdf\xa3':
371
- log_info(" Valid WEBM header detected")
372
- first_chunk_processed = True
373
- else:
374
- log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
375
- # Hatalı format, stream'i durdur
376
- break
377
-
378
- # İlk chunk geçerliyse devam et
379
- if chunk_count == 1 and not first_chunk_processed:
380
- break
381
 
382
- # Her 100 chunk'ta durum raporu
383
- if chunk_count % 100 == 0:
384
- avg_chunk_size = total_bytes / chunk_count
385
- log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total, avg {avg_chunk_size:.0f} bytes/chunk")
386
 
387
  yield speech.StreamingRecognizeRequest(audio_content=chunk)
388
 
389
  except queue.Empty:
390
- # Check for silence timeout
391
- if time.time() - last_chunk_time > silence_timeout:
392
- log_info(f"🔇 Silence timeout reached ({silence_timeout}s), ending stream")
393
- break
394
  continue
395
  except Exception as e:
396
  log_error(f"❌ Error in request generator: {e}")
@@ -398,108 +381,77 @@ class GoogleCloudSTT(STTInterface):
398
 
399
  # Create streaming client
400
  requests = request_generator()
401
-
402
  log_info("🎤 Creating Google STT streaming client...")
403
 
404
  try:
405
  responses = self.client.streaming_recognize(
406
  self.streaming_config,
407
  requests,
408
- timeout=300 # 5 dakika timeout
409
  )
410
 
411
  log_info("✅ Google STT streaming client created")
412
 
413
- # Response timeout kontrolü
414
- last_response_time = time.time()
415
- RESPONSE_TIMEOUT = 30 # 30 saniye içinde response gelmezse
416
-
417
- # Process responses
418
- response_count = 0
419
- empty_response_count = 0
420
-
421
  for response in responses:
422
- last_response_time = time.time()
423
- response_count += 1
424
-
425
- # Response type'ı logla
426
- if response_count == 1:
427
- log_info(f"📨 First response received from Google STT")
428
-
429
  if self.stop_event.is_set():
430
- log_info("🛑 Stop event detected, breaking response loop")
431
  break
432
 
433
- # Response içeriğini kontrol et
 
 
 
 
 
 
 
 
434
  if not response.results:
435
- empty_response_count += 1
436
- if empty_response_count == 1:
437
- log_debug("📭 Received empty response (no results)")
438
  continue
439
 
440
- for i, result in enumerate(response.results):
441
- log_debug(f"📋 Result {i}: is_final={result.is_final}, alternatives={len(result.alternatives)}")
442
-
443
  if not result.alternatives:
444
- log_debug(f"📋 Result {i} has no alternatives")
445
  continue
446
 
447
- # İlk alternatifi al
448
  alternative = result.alternatives[0]
449
-
450
- # Sadece anlamlı text'leri işle
451
  if alternative.transcript.strip():
452
  # Create transcription result
453
  transcription = TranscriptionResult(
454
  text=alternative.transcript,
455
  is_final=result.is_final,
456
- confidence=alternative.confidence if hasattr(alternative, 'confidence') and alternative.confidence else 0.0,
457
  timestamp=datetime.now().timestamp()
458
  )
459
 
460
  # Put result in queue
461
  self._put_result(transcription)
462
 
463
- # SADECE final result'ları logla
464
  if result.is_final:
465
- log_info(f"🎯 GOOGLE STT FINAL: '{alternative.transcript}'")
466
 
467
- # Single utterance modunda stream otomatik kapanacak
468
  if self.streaming_config.single_utterance:
469
- log_info("🏁 Single utterance completed - Stream will auto-close")
470
- # Google STT single utterance modda otomatik kapatır
471
- # Ama biz de clean bir şekilde çıkalım
472
- self.is_streaming = False
473
- return
474
- else:
475
- log_debug(f"📋 Result {i} has empty transcript")
476
- continue
477
 
478
- if time.time() - last_response_time > RESPONSE_TIMEOUT:
479
- log_error(f"❌ No response from Google STT for {RESPONSE_TIMEOUT} seconds")
480
-
481
- log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}")
482
 
483
  except Exception as e:
484
  error_msg = str(e)
485
-
486
- # Detaylı hata mesajları
487
- if "Exceeded maximum allowed stream duration" in error_msg:
488
- log_warning("⚠️ Stream duration limit exceeded (5 minutes). This is expected for long sessions.")
489
- elif "Bad language code" in error_msg:
490
- log_error(f" Invalid language code in STT config. Check locale settings.")
491
- elif "invalid_argument" in error_msg:
492
- log_error(f"❌ Invalid STT configuration. Check encoding and sample rate.")
493
- elif "Deadline Exceeded" in error_msg:
494
- log_error(f"❌ Google STT response timeout - possibly network issue or slow connection")
495
- elif "503" in error_msg or "Service Unavailable" in error_msg:
496
- log_error(f"❌ Google STT service temporarily unavailable. Will retry...")
497
  else:
498
- log_error(f"❌ Google STT stream error: {error_msg}")
499
 
500
  except Exception as e:
501
- log_error(f"❌ Fatal error in STT stream thread", error=str(e), traceback=traceback.format_exc())
502
  finally:
503
  log_info("🎤 Google STT stream thread ended")
504
- # Thread bittiğinde streaming flag'ini kapat
505
  self.is_streaming = False
 
347
  """Generate streaming requests"""
348
  chunk_count = 0
349
  total_bytes = 0
350
+
 
 
 
351
  while not self.stop_event.is_set():
352
  try:
353
  chunk = self.audio_queue.get(timeout=0.1)
 
357
 
358
  chunk_count += 1
359
  total_bytes += len(chunk)
 
360
 
361
  # İlk chunk'ta audio format kontrolü
362
  if chunk_count == 1:
363
  log_info(f"📤 First chunk - size: {len(chunk)} bytes")
364
+ if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
365
+ log_info("✅ Valid WEBM header detected")
366
+ else:
367
+ log_error(f" Invalid audio format")
368
+ break
 
 
 
 
 
 
 
 
369
 
370
+ # Her 50 chunk'ta durum raporu
371
+ if chunk_count % 50 == 0:
372
+ log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total")
 
373
 
374
  yield speech.StreamingRecognizeRequest(audio_content=chunk)
375
 
376
  except queue.Empty:
 
 
 
 
377
  continue
378
  except Exception as e:
379
  log_error(f"❌ Error in request generator: {e}")
 
381
 
382
  # Create streaming client
383
  requests = request_generator()
 
384
  log_info("🎤 Creating Google STT streaming client...")
385
 
386
  try:
387
  responses = self.client.streaming_recognize(
388
  self.streaming_config,
389
  requests,
390
+ timeout=300
391
  )
392
 
393
  log_info("✅ Google STT streaming client created")
394
 
 
 
 
 
 
 
 
 
395
  for response in responses:
 
 
 
 
 
 
 
396
  if self.stop_event.is_set():
397
+ log_info("🛑 Stop event detected")
398
  break
399
 
400
+ # Check for speech events (VAD)
401
+ if hasattr(response, 'speech_event_type'):
402
+ event_type = response.speech_event_type
403
+ if event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
404
+ log_info("🏁 Google STT: End of single utterance detected")
405
+ # Google otomatik olarak stream'i kapatacak
406
+ break
407
+
408
+ # Process results
409
  if not response.results:
 
 
 
410
  continue
411
 
412
+ for result in response.results:
 
 
413
  if not result.alternatives:
 
414
  continue
415
 
 
416
  alternative = result.alternatives[0]
417
+
 
418
  if alternative.transcript.strip():
419
  # Create transcription result
420
  transcription = TranscriptionResult(
421
  text=alternative.transcript,
422
  is_final=result.is_final,
423
+ confidence=getattr(alternative, 'confidence', 0.0),
424
  timestamp=datetime.now().timestamp()
425
  )
426
 
427
  # Put result in queue
428
  self._put_result(transcription)
429
 
 
430
  if result.is_final:
431
+ log_info(f"🎯 FINAL TRANSCRIPT: '{alternative.transcript}'")
432
 
433
+ # Single utterance modunda Google STT otomatik kapanır
434
  if self.streaming_config.single_utterance:
435
+ log_info(" Single utterance mode - Google STT will close stream")
436
+ # Loop otomatik sonlanacak
437
+ else:
438
+ log_debug(f"📝 Interim: '{alternative.transcript}'")
 
 
 
 
439
 
440
+ log_info("📊 Google STT stream ended normally")
 
 
 
441
 
442
  except Exception as e:
443
  error_msg = str(e)
444
+
445
+ # Google STT'nin normal kapanma durumları
446
+ if "iterating requests" in error_msg:
447
+ log_info(" Google STT stream closed normally (end of utterance)")
448
+ elif "Exceeded maximum allowed stream duration" in error_msg:
449
+ log_warning("⚠️ Stream duration limit (5 min)")
 
 
 
 
 
 
450
  else:
451
+ log_error(f"❌ Google STT error: {error_msg}")
452
 
453
  except Exception as e:
454
+ log_error(f"❌ Fatal error in STT stream", error=str(e))
455
  finally:
456
  log_info("🎤 Google STT stream thread ended")
 
457
  self.is_streaming = False