ciyidogan commited on
Commit
0f4dc74
·
verified ·
1 Parent(s): 2340c2e

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +41 -184
stt/stt_google.py CHANGED
@@ -196,24 +196,24 @@ class GoogleCloudSTT(STTInterface):
196
  def _run_stream(self):
197
  """Run the streaming recognition in a separate thread"""
198
  try:
199
- log_info("🎤 Google STT stream thread started")
200
-
201
  def request_generator():
202
  """Generate streaming requests"""
203
  chunk_count = 0
204
  total_bytes = 0
205
  first_chunk_processed = False
206
-
207
  while not self.stop_event.is_set():
208
  try:
209
  chunk = self.audio_queue.get(timeout=0.1)
210
  if chunk is None:
211
  log_info("📛 Poison pill received, stopping request generator")
212
  break
213
-
214
  chunk_count += 1
215
  total_bytes += len(chunk)
216
-
217
  # İlk chunk'ta audio format kontrolü
218
  if chunk_count == 1:
219
  log_info(f"📤 First chunk - size: {len(chunk)} bytes")
@@ -226,75 +226,75 @@ class GoogleCloudSTT(STTInterface):
226
  log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
227
  # Hatalı format, stream'i durdur
228
  break
229
-
230
  # İlk chunk geçerliyse devam et
231
  if chunk_count == 1 and not first_chunk_processed:
232
  break
233
-
234
  # Her 100 chunk'ta durum raporu
235
  if chunk_count % 100 == 0:
236
  avg_chunk_size = total_bytes / chunk_count
237
  log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total, avg {avg_chunk_size:.0f} bytes/chunk")
238
-
239
  yield speech.StreamingRecognizeRequest(audio_content=chunk)
240
-
241
  except queue.Empty:
242
  continue
243
  except Exception as e:
244
  log_error(f"❌ Error in request generator: {e}")
245
  break
246
-
247
  # Create streaming client
248
  requests = request_generator()
249
-
250
  log_info("🎤 Creating Google STT streaming client...")
251
-
252
  try:
253
  responses = self.client.streaming_recognize(
254
  self.streaming_config,
255
  requests,
256
  timeout=300 # 5 dakika timeout
257
  )
258
-
259
  log_info("✅ Google STT streaming client created")
260
-
261
  # Response timeout kontrolü
262
  last_response_time = time.time()
263
  RESPONSE_TIMEOUT = 30 # 30 saniye içinde response gelmezse
264
-
265
  # Process responses
266
  response_count = 0
267
  empty_response_count = 0
268
-
269
  for response in responses:
270
  last_response_time = time.time()
271
  response_count += 1
272
-
273
  # Response type'ı logla
274
  if response_count == 1:
275
  log_info(f"📨 First response received from Google STT")
276
-
277
  if self.stop_event.is_set():
278
  log_info("🛑 Stop event detected, breaking response loop")
279
  break
280
-
281
  # Response içeriğini kontrol et
282
  if not response.results:
283
  empty_response_count += 1
284
  if empty_response_count == 1:
285
  log_debug("📭 Received empty response (no results)")
286
  continue
287
-
288
  for i, result in enumerate(response.results):
289
  log_debug(f"📋 Result {i}: is_final={result.is_final}, alternatives={len(result.alternatives)}")
290
-
291
  if not result.alternatives:
292
  log_debug(f"📋 Result {i} has no alternatives")
293
  continue
294
-
295
  # İlk alternatifi al
296
  alternative = result.alternatives[0]
297
-
298
  # Sadece anlamlı text'leri işle
299
  if alternative.transcript.strip():
300
  # Create transcription result
@@ -304,25 +304,33 @@ class GoogleCloudSTT(STTInterface):
304
  confidence=alternative.confidence if hasattr(alternative, 'confidence') and alternative.confidence else 0.0,
305
  timestamp=datetime.now().timestamp()
306
  )
307
-
308
  # Put result in queue
309
  self._put_result(transcription)
310
-
311
  # SADECE final result'ları logla
312
  if result.is_final:
313
  log_info(f"🎯 GOOGLE STT FINAL: '{alternative.transcript}'")
 
 
 
 
 
 
 
 
314
  else:
315
  log_debug(f"📋 Result {i} has empty transcript")
316
  continue
317
-
318
- if time.time() - last_response_time > RESPONSE_TIMEOUT:
319
- log_error(f"❌ No response from Google STT for {RESPONSE_TIMEOUT} seconds")
320
-
321
  log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}")
322
-
323
  except Exception as e:
324
  error_msg = str(e)
325
-
326
  # Detaylı hata mesajları
327
  if "Exceeded maximum allowed stream duration" in error_msg:
328
  log_warning("⚠️ Stream duration limit exceeded (5 minutes). This is expected for long sessions.")
@@ -336,161 +344,10 @@ class GoogleCloudSTT(STTInterface):
336
  log_error(f"❌ Google STT service temporarily unavailable. Will retry...")
337
  else:
338
  log_error(f"❌ Google STT stream error: {error_msg}")
339
-
340
  except Exception as e:
341
  log_error(f"❌ Fatal error in STT stream thread", error=str(e), traceback=traceback.format_exc())
342
  finally:
343
  log_info("🎤 Google STT stream thread ended")
344
  # Thread bittiğinde streaming flag'ini kapat
345
- self.is_streaming = False
346
-
347
- async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
348
- """Stream audio chunk and get transcription results"""
349
- if not self.is_streaming:
350
- # Daha detaylı hata mesajı
351
- log_error(f"❌ STT not streaming - is_streaming: {self.is_streaming}, thread alive: {self.stream_thread and self.stream_thread.is_alive() if hasattr(self, 'stream_thread') else 'No thread'}")
352
- raise RuntimeError("Streaming not started. Call start_streaming() first.")
353
-
354
- try:
355
- # Put audio in queue for streaming thread
356
- self.audio_queue.put(audio_chunk)
357
-
358
- # Check for any results in queue
359
- while True:
360
- try:
361
- # Non-blocking get from normal queue
362
- result = self.responses_queue.get_nowait()
363
- yield result
364
- except queue.Empty:
365
- # No more results in queue
366
- break
367
-
368
- except Exception as e:
369
- log_error(f"❌ Google STT streaming error", error=str(e))
370
- # Stream'i tekrar başlatmayı tetikle
371
- self.is_streaming = False
372
- raise
373
-
374
- async def stop_streaming(self) -> Optional[TranscriptionResult]:
375
- """Stop streaming and clean up all resources"""
376
- if not self.is_streaming and not self.stream_thread:
377
- log_debug("Already stopped, nothing to do")
378
- return None
379
-
380
- try:
381
- log_info(f"🛑 Stopping Google STT streaming session #{self.session_id}")
382
-
383
- # Flag'i hemen kapat
384
- self.is_streaming = False
385
- self.stop_event.set()
386
-
387
- # Send poison pill to stop request generator
388
- if self.audio_queue:
389
- try:
390
- self.audio_queue.put(None)
391
- except:
392
- pass
393
-
394
- # Thread'i durdur
395
- if self.stream_thread and self.stream_thread.is_alive():
396
- log_info("⏳ Waiting for stream thread to finish...")
397
- self.stream_thread.join(timeout=5.0) # 5 saniye bekle
398
-
399
- if self.stream_thread.is_alive():
400
- log_warning("⚠️ STT thread did not stop gracefully after 5s")
401
- # Thread'i zorla sonlandıramayız Python'da, ama daemon olduğu için
402
- # ana program kapanınca otomatik kapanacak
403
- else:
404
- log_info("✅ Stream thread finished")
405
-
406
- # Final result'ı al
407
- final_result = None
408
- if self.responses_queue:
409
- while not self.responses_queue.empty():
410
- try:
411
- result = self.responses_queue.get_nowait()
412
- if result.is_final:
413
- final_result = result
414
- except:
415
- pass
416
-
417
- # Client'ı kapat
418
- if self.client:
419
- try:
420
- # Transport'u kapat
421
- if hasattr(self.client, 'transport') and hasattr(self.client.transport, 'close'):
422
- self.client.transport.close()
423
- log_debug("✅ Client transport closed")
424
-
425
- # gRPC channel'ı kapat
426
- if hasattr(self.client, '_transport') and hasattr(self.client._transport, '_grpc_channel'):
427
- self.client._transport._grpc_channel.close()
428
- log_debug("✅ gRPC channel closed")
429
- except Exception as e:
430
- log_warning(f"⚠️ Error closing Google client: {e}")
431
- finally:
432
- self.client = None
433
-
434
- # Queue'ları None yap (yeniden kullanım için fresh queue gerekecek)
435
- self.audio_queue = None
436
- self.responses_queue = None
437
-
438
- # Diğer değişkenleri resetle
439
- self.stream_thread = None
440
- self.streaming_config = None
441
- self.stop_event.clear()
442
-
443
- log_info(f"✅ Google STT streaming session #{self.session_id} stopped and cleaned")
444
- return final_result
445
-
446
- except Exception as e:
447
- log_error(f"❌ Error during stop_streaming", error=str(e))
448
- # Force cleanup on error
449
- self.is_streaming = False
450
- self.stream_thread = None
451
- self.client = None
452
- self.streaming_config = None
453
- self.stop_event.clear()
454
- self.audio_queue = None
455
- self.responses_queue = None
456
- return None
457
-
458
- def supports_realtime(self) -> bool:
459
- """Google Cloud STT supports real-time streaming"""
460
- return True
461
-
462
- def get_supported_languages(self) -> List[str]:
463
- """Get list of supported language codes"""
464
- return [
465
- "tr-TR", # Turkish
466
- "en-US", # English (US)
467
- "en-GB", # English (UK)
468
- "de-DE", # German
469
- "fr-FR", # French
470
- "es-ES", # Spanish
471
- "it-IT", # Italian
472
- "pt-BR", # Portuguese (Brazil)
473
- "ru-RU", # Russian
474
- "ja-JP", # Japanese
475
- "ko-KR", # Korean
476
- "zh-CN", # Chinese (Simplified)
477
- "ar-SA", # Arabic
478
- ]
479
-
480
- def get_provider_name(self) -> str:
481
- """Get provider name"""
482
- return "google"
483
-
484
- def _get_encoding(self, encoding_str: str):
485
- """Convert encoding string to Google Speech enum"""
486
- if not GOOGLE_SPEECH_AVAILABLE:
487
- return None
488
-
489
- encoding_map = {
490
- "WEBM_OPUS": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
491
- "LINEAR16": speech.RecognitionConfig.AudioEncoding.LINEAR16,
492
- "FLAC": speech.RecognitionConfig.AudioEncoding.FLAC,
493
- "MP3": speech.RecognitionConfig.AudioEncoding.MP3,
494
- "OGG_OPUS": speech.RecognitionConfig.AudioEncoding.OGG_OPUS,
495
- }
496
- return encoding_map.get(encoding_str, speech.RecognitionConfig.AudioEncoding.WEBM_OPUS)
 
196
  def _run_stream(self):
197
  """Run the streaming recognition in a separate thread"""
198
  try:
199
+ log_info(f"🎤 Google STT stream thread started - Single utterance mode: {self.streaming_config.single_utterance}")
200
+
201
  def request_generator():
202
  """Generate streaming requests"""
203
  chunk_count = 0
204
  total_bytes = 0
205
  first_chunk_processed = False
206
+
207
  while not self.stop_event.is_set():
208
  try:
209
  chunk = self.audio_queue.get(timeout=0.1)
210
  if chunk is None:
211
  log_info("📛 Poison pill received, stopping request generator")
212
  break
213
+
214
  chunk_count += 1
215
  total_bytes += len(chunk)
216
+
217
  # İlk chunk'ta audio format kontrolü
218
  if chunk_count == 1:
219
  log_info(f"📤 First chunk - size: {len(chunk)} bytes")
 
226
  log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
227
  # Hatalı format, stream'i durdur
228
  break
229
+
230
  # İlk chunk geçerliyse devam et
231
  if chunk_count == 1 and not first_chunk_processed:
232
  break
233
+
234
  # Her 100 chunk'ta durum raporu
235
  if chunk_count % 100 == 0:
236
  avg_chunk_size = total_bytes / chunk_count
237
  log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total, avg {avg_chunk_size:.0f} bytes/chunk")
238
+
239
  yield speech.StreamingRecognizeRequest(audio_content=chunk)
240
+
241
  except queue.Empty:
242
  continue
243
  except Exception as e:
244
  log_error(f"❌ Error in request generator: {e}")
245
  break
246
+
247
  # Create streaming client
248
  requests = request_generator()
249
+
250
  log_info("🎤 Creating Google STT streaming client...")
251
+
252
  try:
253
  responses = self.client.streaming_recognize(
254
  self.streaming_config,
255
  requests,
256
  timeout=300 # 5 dakika timeout
257
  )
258
+
259
  log_info("✅ Google STT streaming client created")
260
+
261
  # Response timeout kontrolü
262
  last_response_time = time.time()
263
  RESPONSE_TIMEOUT = 30 # 30 saniye içinde response gelmezse
264
+
265
  # Process responses
266
  response_count = 0
267
  empty_response_count = 0
268
+
269
  for response in responses:
270
  last_response_time = time.time()
271
  response_count += 1
272
+
273
  # Response type'ı logla
274
  if response_count == 1:
275
  log_info(f"📨 First response received from Google STT")
276
+
277
  if self.stop_event.is_set():
278
  log_info("🛑 Stop event detected, breaking response loop")
279
  break
280
+
281
  # Response içeriğini kontrol et
282
  if not response.results:
283
  empty_response_count += 1
284
  if empty_response_count == 1:
285
  log_debug("📭 Received empty response (no results)")
286
  continue
287
+
288
  for i, result in enumerate(response.results):
289
  log_debug(f"📋 Result {i}: is_final={result.is_final}, alternatives={len(result.alternatives)}")
290
+
291
  if not result.alternatives:
292
  log_debug(f"📋 Result {i} has no alternatives")
293
  continue
294
+
295
  # İlk alternatifi al
296
  alternative = result.alternatives[0]
297
+
298
  # Sadece anlamlı text'leri işle
299
  if alternative.transcript.strip():
300
  # Create transcription result
 
304
  confidence=alternative.confidence if hasattr(alternative, 'confidence') and alternative.confidence else 0.0,
305
  timestamp=datetime.now().timestamp()
306
  )
307
+
308
  # Put result in queue
309
  self._put_result(transcription)
310
+
311
  # SADECE final result'ları logla
312
  if result.is_final:
313
  log_info(f"🎯 GOOGLE STT FINAL: '{alternative.transcript}'")
314
+
315
+ # ✅ Single utterance modunda stream otomatik kapanacak
316
+ if self.streaming_config.single_utterance:
317
+ log_info("🏁 Single utterance completed - Stream will auto-close")
318
+ # Google STT single utterance modda otomatik kapatır
319
+ # Ama biz de clean bir şekilde çıkalım
320
+ self.is_streaming = False
321
+ return
322
  else:
323
  log_debug(f"📋 Result {i} has empty transcript")
324
  continue
325
+
326
+ if time.time() - last_response_time > RESPONSE_TIMEOUT:
327
+ log_error(f"❌ No response from Google STT for {RESPONSE_TIMEOUT} seconds")
328
+
329
  log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}")
330
+
331
  except Exception as e:
332
  error_msg = str(e)
333
+
334
  # Detaylı hata mesajları
335
  if "Exceeded maximum allowed stream duration" in error_msg:
336
  log_warning("⚠️ Stream duration limit exceeded (5 minutes). This is expected for long sessions.")
 
344
  log_error(f"❌ Google STT service temporarily unavailable. Will retry...")
345
  else:
346
  log_error(f"❌ Google STT stream error: {error_msg}")
347
+
348
  except Exception as e:
349
  log_error(f"❌ Fatal error in STT stream thread", error=str(e), traceback=traceback.format_exc())
350
  finally:
351
  log_info("🎤 Google STT stream thread ended")
352
  # Thread bittiğinde streaming flag'ini kapat
353
+ self.is_streaming = False