ciyidogan commited on
Commit
f327841
·
verified ·
1 Parent(s): f3b26f3

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +44 -32
stt/stt_google.py CHANGED
@@ -267,22 +267,22 @@ class GoogleCloudSTT(STTInterface):
267
  await self.stop_streaming()
268
  # Temizlik için bekle
269
  await asyncio.sleep(0.5)
270
-
271
  # Session verilerini resetle ve ID'yi artır
272
  self._reset_session_data()
273
-
274
  log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
275
-
276
  # Fresh queue'lar oluştur
277
  self._create_fresh_queues()
278
-
279
  # Stop event'i temizle
280
  self.stop_event.clear()
281
-
282
  # Yeni client oluştur (TEK SEFER)
283
  self.client = speech.SpeechClient()
284
  log_info("✅ Created new Google Speech client")
285
-
286
  # Convert dict to STTConfig if needed
287
  if isinstance(config, dict):
288
  stt_config = STTConfig(
@@ -290,29 +290,33 @@ class GoogleCloudSTT(STTInterface):
290
  sample_rate=config.get("sample_rate", 16000),
291
  encoding=config.get("encoding", "WEBM_OPUS"),
292
  enable_punctuation=config.get("enable_punctuation", True),
293
- interim_results=config.get("interim_results", True),
294
- single_utterance=config.get("single_utterance", False)
295
  )
296
  else:
297
  stt_config = config
298
-
299
  recognition_config = speech.RecognitionConfig(
300
  encoding=self._get_encoding(stt_config.encoding),
301
  sample_rate_hertz=stt_config.sample_rate,
302
  language_code=stt_config.language,
303
  enable_automatic_punctuation=stt_config.enable_punctuation,
304
  model="latest_long",
305
- use_enhanced=True
 
 
 
306
  )
307
-
308
  self.streaming_config = speech.StreamingRecognitionConfig(
309
  config=recognition_config,
310
  interim_results=stt_config.interim_results,
311
  single_utterance=stt_config.single_utterance
 
312
  )
313
-
314
  self.is_streaming = True
315
-
316
  # Start streaming thread with unique name
317
  self.stream_thread = threading.Thread(
318
  target=self._run_stream,
@@ -320,9 +324,9 @@ class GoogleCloudSTT(STTInterface):
320
  )
321
  self.stream_thread.daemon = True # Daemon thread olarak işaretle
322
  self.stream_thread.start()
323
-
324
  log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
325
-
326
  except Exception as e:
327
  log_error(f"❌ Failed to start Google STT streaming", error=str(e))
328
  self.is_streaming = False
@@ -364,8 +368,9 @@ class GoogleCloudSTT(STTInterface):
364
  if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
365
  log_info("✅ Valid WEBM header detected")
366
  else:
367
- log_error(f"❌ Invalid audio format")
368
- break
 
369
 
370
  # Her 50 chunk'ta durum raporu
371
  if chunk_count % 50 == 0:
@@ -379,6 +384,8 @@ class GoogleCloudSTT(STTInterface):
379
  log_error(f"❌ Error in request generator: {e}")
380
  break
381
 
 
 
382
  # Create streaming client
383
  requests = request_generator()
384
  log_info("🎤 Creating Google STT streaming client...")
@@ -390,26 +397,30 @@ class GoogleCloudSTT(STTInterface):
390
  timeout=300
391
  )
392
 
393
- log_info("✅ Google STT streaming client created")
 
 
 
 
394
 
395
  for response in responses:
 
 
 
 
 
396
  if self.stop_event.is_set():
397
  log_info("🛑 Stop event detected")
398
  break
399
 
400
- # Check for speech events (VAD)
401
- if hasattr(response, 'speech_event_type'):
402
- event_type = response.speech_event_type
403
- if event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
404
- log_info("🏁 Google STT: End of single utterance detected")
405
- # Google otomatik olarak stream'i kapatacak
406
- break
407
-
408
  # Process results
409
  if not response.results:
 
410
  continue
411
 
412
  for result in response.results:
 
 
413
  if not result.alternatives:
414
  continue
415
 
@@ -432,26 +443,27 @@ class GoogleCloudSTT(STTInterface):
432
 
433
  # Single utterance modunda Google STT otomatik kapanır
434
  if self.streaming_config.single_utterance:
435
- log_info("✅ Single utterance mode - Google STT will close stream")
436
- # Loop otomatik sonlanacak
 
437
  else:
438
  log_debug(f"📝 Interim: '{alternative.transcript}'")
439
 
440
- log_info("📊 Google STT stream ended normally")
441
 
442
  except Exception as e:
443
  error_msg = str(e)
444
 
445
- # Google STT'nin normal kapanma durumları
446
  if "iterating requests" in error_msg:
447
- log_info("✅ Google STT stream closed normally (end of utterance)")
448
  elif "Exceeded maximum allowed stream duration" in error_msg:
449
  log_warning("⚠️ Stream duration limit (5 min)")
450
  else:
451
  log_error(f"❌ Google STT error: {error_msg}")
452
 
453
  except Exception as e:
454
- log_error(f"❌ Fatal error in STT stream", error=str(e))
455
  finally:
456
  log_info("🎤 Google STT stream thread ended")
457
  self.is_streaming = False
 
267
  await self.stop_streaming()
268
  # Temizlik için bekle
269
  await asyncio.sleep(0.5)
270
+
271
  # Session verilerini resetle ve ID'yi artır
272
  self._reset_session_data()
273
+
274
  log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
275
+
276
  # Fresh queue'lar oluştur
277
  self._create_fresh_queues()
278
+
279
  # Stop event'i temizle
280
  self.stop_event.clear()
281
+
282
  # Yeni client oluştur (TEK SEFER)
283
  self.client = speech.SpeechClient()
284
  log_info("✅ Created new Google Speech client")
285
+
286
  # Convert dict to STTConfig if needed
287
  if isinstance(config, dict):
288
  stt_config = STTConfig(
 
290
  sample_rate=config.get("sample_rate", 16000),
291
  encoding=config.get("encoding", "WEBM_OPUS"),
292
  enable_punctuation=config.get("enable_punctuation", True),
293
+ interim_results=config.get("interim_results", False),
294
+ single_utterance=config.get("single_utterance", True)
295
  )
296
  else:
297
  stt_config = config
298
+
299
  recognition_config = speech.RecognitionConfig(
300
  encoding=self._get_encoding(stt_config.encoding),
301
  sample_rate_hertz=stt_config.sample_rate,
302
  language_code=stt_config.language,
303
  enable_automatic_punctuation=stt_config.enable_punctuation,
304
  model="latest_long",
305
+ use_enhanced=True,
306
+ # Bu parametreleri kaldırıyoruz - v1 API'de yok
307
+ # enable_voice_activity_events=True,
308
+ # audio_channel_count=1
309
  )
310
+
311
  self.streaming_config = speech.StreamingRecognitionConfig(
312
  config=recognition_config,
313
  interim_results=stt_config.interim_results,
314
  single_utterance=stt_config.single_utterance
315
+ # enable_voice_activity_events kaldırıldı
316
  )
317
+
318
  self.is_streaming = True
319
+
320
  # Start streaming thread with unique name
321
  self.stream_thread = threading.Thread(
322
  target=self._run_stream,
 
324
  )
325
  self.stream_thread.daemon = True # Daemon thread olarak işaretle
326
  self.stream_thread.start()
327
+
328
  log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
329
+
330
  except Exception as e:
331
  log_error(f"❌ Failed to start Google STT streaming", error=str(e))
332
  self.is_streaming = False
 
368
  if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
369
  log_info("✅ Valid WEBM header detected")
370
  else:
371
+ log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
372
+ # Format hatalıysa devam et, Google STT düzeltebilir
373
+ # break
374
 
375
  # Her 50 chunk'ta durum raporu
376
  if chunk_count % 50 == 0:
 
384
  log_error(f"❌ Error in request generator: {e}")
385
  break
386
 
387
+ log_info(f"📊 Request generator finished. Total chunks: {chunk_count}, Total bytes: {total_bytes}")
388
+
389
  # Create streaming client
390
  requests = request_generator()
391
  log_info("🎤 Creating Google STT streaming client...")
 
397
  timeout=300
398
  )
399
 
400
+ log_info("✅ Google STT streaming client created, waiting for responses...")
401
+
402
+ # Process responses
403
+ response_count = 0
404
+ result_count = 0
405
 
406
  for response in responses:
407
+ response_count += 1
408
+
409
+ if response_count == 1:
410
+ log_info(f"📨 First response received from Google STT")
411
+
412
  if self.stop_event.is_set():
413
  log_info("🛑 Stop event detected")
414
  break
415
 
 
 
 
 
 
 
 
 
416
  # Process results
417
  if not response.results:
418
+ log_debug(f"📭 Response #{response_count} has no results")
419
  continue
420
 
421
  for result in response.results:
422
+ result_count += 1
423
+
424
  if not result.alternatives:
425
  continue
426
 
 
443
 
444
  # Single utterance modunda Google STT otomatik kapanır
445
  if self.streaming_config.single_utterance:
446
+ log_info("✅ Single utterance mode - Stream will end")
447
+ # Google stream'i kapatacak, biz de çıkalım
448
+ return
449
  else:
450
  log_debug(f"📝 Interim: '{alternative.transcript}'")
451
 
452
+ log_info(f"📊 Google STT stream ended. Responses: {response_count}, Results: {result_count}")
453
 
454
  except Exception as e:
455
  error_msg = str(e)
456
 
457
+ # Beklenen hatalar
458
  if "iterating requests" in error_msg:
459
+ log_info("✅ Stream ended normally")
460
  elif "Exceeded maximum allowed stream duration" in error_msg:
461
  log_warning("⚠️ Stream duration limit (5 min)")
462
  else:
463
  log_error(f"❌ Google STT error: {error_msg}")
464
 
465
  except Exception as e:
466
+ log_error(f"❌ Fatal error in STT stream", error=str(e), traceback=traceback.format_exc())
467
  finally:
468
  log_info("🎤 Google STT stream thread ended")
469
  self.is_streaming = False