ciyidogan commited on
Commit
57b160d
Β·
verified Β·
1 Parent(s): 1a37688

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +111 -145
stt/stt_google.py CHANGED
@@ -79,29 +79,66 @@ class GoogleCloudSTT(STTInterface):
79
  }
80
  return encoding_map.get(encoding_str, speech.RecognitionConfig.AudioEncoding.WEBM_OPUS)
81
 
82
- async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
83
- """Stream audio chunk and get transcription results"""
84
  if not self.is_streaming:
85
- log_error(f"❌ STT not streaming - is_streaming: {self.is_streaming}")
86
- raise RuntimeError("Streaming not started. Call start_streaming() first.")
87
-
88
  try:
89
- # Put audio in queue for streaming thread
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  self.audio_queue.put(audio_chunk)
91
-
92
- # Check for any results in queue
93
- while True:
 
 
 
 
 
 
 
 
 
94
  try:
95
- # Non-blocking get from queue
96
  result = self.responses_queue.get_nowait()
 
97
  yield result
98
  except queue.Empty:
99
- # No more results in queue
 
 
 
100
  break
101
-
102
  except Exception as e:
103
- log_error(f"❌ Google STT streaming error", error=str(e))
104
- self.is_streaming = False
105
  raise
106
 
107
  async def stop_streaming(self) -> Optional[TranscriptionResult]:
@@ -343,142 +380,71 @@ class GoogleCloudSTT(STTInterface):
343
  log_error(f"❌ Error queuing result: {e}")
344
 
345
  def _run_stream(self):
346
- """Run the streaming recognition in a separate thread"""
347
  try:
348
- log_info(f"🎀 Google STT stream thread started - Single utterance mode: {self.streaming_config.single_utterance}")
349
-
350
- def request_generator():
351
- """Generate streaming requests"""
352
- chunk_count = 0
353
- total_bytes = 0
354
-
355
- while not self.stop_event.is_set():
356
- try:
357
- chunk = self.audio_queue.get(timeout=0.1)
358
- if chunk is None:
359
- log_info("πŸ“› Poison pill received, stopping request generator")
360
- break
361
-
362
- chunk_count += 1
363
- total_bytes += len(chunk)
364
-
365
- # Δ°lk chunk'ta audio format kontrolΓΌ
366
- if chunk_count == 1:
367
- log_info(f"πŸ“€ First chunk - size: {len(chunk)} bytes")
368
- if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
369
- log_info("βœ… Valid WEBM header detected")
370
-
371
- # Her 50 chunk'ta durum raporu
372
- if chunk_count % 50 == 0:
373
- log_info(f"πŸ“€ Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total")
374
-
375
- yield speech.StreamingRecognizeRequest(audio_content=chunk)
376
-
377
- except queue.Empty:
378
- continue
379
- except Exception as e:
380
- log_error(f"❌ Error in request generator: {e}")
381
- break
382
-
383
- log_info(f"πŸ“Š Request generator finished. Total chunks: {chunk_count}, Total bytes: {total_bytes}")
384
-
385
  # Create streaming client
386
- requests = request_generator()
387
  log_info("🎀 Creating Google STT streaming client...")
388
-
389
- try:
390
- # Start streaming
391
- responses = self.client.streaming_recognize(
392
- self.streaming_config,
393
- requests,
394
- timeout=300
395
- )
396
-
397
- log_info("βœ… Google STT streaming client created, waiting for responses...")
398
-
399
- # Process responses
400
- response_count = 0
401
- result_count = 0
402
- last_log_time = time.time()
403
-
404
- # Response iterator'ı başlat
405
- try:
406
- for response in responses:
407
- response_count += 1
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
- # Δ°lk response'u logla
410
- if response_count == 1:
411
- log_info(f"πŸ“¨ First response received from Google STT")
412
-
413
- # Her 5 saniyede bir durum logu
414
- if time.time() - last_log_time > 5:
415
- log_info(f"πŸ“Š Still listening... Responses: {response_count}, Results: {result_count}")
416
- last_log_time = time.time()
417
-
418
- if self.stop_event.is_set():
419
- log_info("πŸ›‘ Stop event detected")
420
- break
421
-
422
- # Process results
423
- if not response.results:
424
- log_debug(f"πŸ“­ Response #{response_count} has no results")
425
- continue
426
-
427
- for result in response.results:
428
- result_count += 1
429
-
430
- if not result.alternatives:
431
- continue
432
-
433
- alternative = result.alternatives[0]
434
-
435
- # Log all transcripts, even empty ones
436
- log_debug(f"πŸ“ Transcript: '{alternative.transcript}' (is_final: {result.is_final})")
437
-
438
- if alternative.transcript.strip():
439
- # Create transcription result
440
- transcription = TranscriptionResult(
441
- text=alternative.transcript,
442
- is_final=result.is_final,
443
- confidence=getattr(alternative, 'confidence', 0.0),
444
- timestamp=datetime.now().timestamp()
445
- )
446
-
447
- # Put result in queue
448
- self._put_result(transcription)
449
-
450
- if result.is_final:
451
- log_info(f"🎯 FINAL TRANSCRIPT: '{alternative.transcript}'")
452
-
453
- # Single utterance modunda Google STT otomatik kapanΔ±r
454
- if self.streaming_config.single_utterance:
455
- log_info("βœ… Single utterance completed - Stream ending")
456
- return
457
- else:
458
- log_debug(f"πŸ“ Interim: '{alternative.transcript}'")
459
-
460
- except StopIteration:
461
- log_info("βœ… Google STT stream ended (StopIteration)")
462
- except Exception as e:
463
- log_error(f"❌ Error processing responses: {e}")
464
-
465
- log_info(f"πŸ“Š Google STT stream ended. Responses: {response_count}, Results: {result_count}")
466
-
467
- except Exception as e:
468
- error_msg = str(e)
469
 
470
- # Beklenen hatalar
471
- if "iterating requests" in error_msg:
472
- log_info("βœ… Stream ended normally")
473
- elif "Exceeded maximum allowed stream duration" in error_msg:
474
- log_warning("⚠️ Stream duration limit (5 min)")
475
- elif "InvalidArgument" in error_msg:
476
- log_error(f"❌ Invalid STT configuration: {error_msg}")
477
- else:
478
- log_error(f"❌ Google STT error: {error_msg}")
479
-
480
  except Exception as e:
481
- log_error(f"❌ Fatal error in STT stream", error=str(e), traceback=traceback.format_exc())
 
 
 
482
  finally:
483
  log_info("🎀 Google STT stream thread ended")
484
- self.is_streaming = False
 
 
79
  }
80
  return encoding_map.get(encoding_str, speech.RecognitionConfig.AudioEncoding.WEBM_OPUS)
81
 
82
+ async def stream_audio(self, audio_chunk: bytes) -> AsyncGenerator[TranscriptionResult, None]:
83
+ """Stream audio chunk and get results"""
84
  if not self.is_streaming:
85
+ raise Exception("Streaming not started")
86
+
 
87
  try:
88
+ # Audio validation and logging
89
+ chunk_size = len(audio_chunk)
90
+
91
+ # Log first chunk details
92
+ if self.chunk_count == 0:
93
+ log_info(f"πŸ“€ First chunk - size: {chunk_size} bytes")
94
+ # Check for WEBM header
95
+ if audio_chunk.startswith(b'\x1a\x45\xdf\xa3'):
96
+ log_info("βœ… Valid WEBM header detected")
97
+ else:
98
+ hex_preview = audio_chunk[:20].hex()
99
+ log_warning(f"⚠️ Unexpected audio format. First 20 bytes: {hex_preview}")
100
+
101
+ # Try to measure audio level (if it's raw PCM)
102
+ try:
103
+ import numpy as np
104
+ # This might fail for WEBM, but let's try
105
+ audio_array = np.frombuffer(audio_chunk[-1000:], dtype=np.int16) # Last 1000 bytes
106
+ if len(audio_array) > 0:
107
+ rms = np.sqrt(np.mean(audio_array.astype(float) ** 2))
108
+ db = 20 * np.log10(max(rms, 1) / 32768.0)
109
+ if self.chunk_count % 50 == 0:
110
+ log_info(f"πŸ”Š Audio level estimate: {db:.1f} dB")
111
+ except:
112
+ # Expected for WEBM format
113
+ pass
114
+
115
+ # Put chunk in queue
116
  self.audio_queue.put(audio_chunk)
117
+ self.chunk_count += 1
118
+ self.total_bytes += chunk_size
119
+
120
+ # Log progress
121
+ if self.chunk_count % 50 == 0:
122
+ log_info(f"πŸ“€ Progress: {self.chunk_count} chunks, {self.total_bytes/1024:.1f}KB total")
123
+
124
+ # Check for responses with timeout
125
+ timeout = 0.1 # 100ms timeout for checking responses
126
+ end_time = time.time() + timeout
127
+
128
+ while time.time() < end_time:
129
  try:
 
130
  result = self.responses_queue.get_nowait()
131
+ log_info(f"🎯 Got result from queue: is_final={result.is_final}, text='{result.text[:30]}...'")
132
  yield result
133
  except queue.Empty:
134
+ # No results yet, continue
135
+ await asyncio.sleep(0.01)
136
+ except Exception as e:
137
+ log_error(f"Error getting result from queue: {e}")
138
  break
139
+
140
  except Exception as e:
141
+ log_error(f"❌ Error in stream_audio: {e}")
 
142
  raise
143
 
144
  async def stop_streaming(self) -> Optional[TranscriptionResult]:
 
380
  log_error(f"❌ Error queuing result: {e}")
381
 
382
  def _run_stream(self):
383
+ """Run the streaming recognition loop in a separate thread"""
384
  try:
385
+ log_info("🎀 Google STT stream thread started - Single utterance mode: {}".format(self.single_utterance))
386
+
387
+ # Create request generator
388
+ requests = self._request_generator()
389
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  # Create streaming client
 
391
  log_info("🎀 Creating Google STT streaming client...")
392
+ responses = self.client.streaming_recognize(self.streaming_config, requests)
393
+
394
+ # Track if we've received any response
395
+ first_response_time = None
396
+ response_count = 0
397
+
398
+ # Process responses with detailed logging
399
+ for response in responses:
400
+ response_count += 1
401
+
402
+ if first_response_time is None:
403
+ first_response_time = time.time()
404
+ elapsed = first_response_time - self.stream_start_time
405
+ log_info(f"πŸŽ‰ FIRST RESPONSE from Google STT after {elapsed:.2f}s")
406
+
407
+ # Log every response, even if empty
408
+ log_info(f"πŸ“¨ Google STT Response #{response_count}: has_results={len(response.results) > 0}")
409
+
410
+ if not response.results:
411
+ log_info("πŸ“­ Empty response from Google STT (no results)")
412
+ continue
413
+
414
+ # Log all results in detail
415
+ for result_idx, result in enumerate(response.results):
416
+ log_info(f"πŸ“ Result #{result_idx}: is_final={result.is_final}, "
417
+ f"alternatives={len(result.alternatives)}, "
418
+ f"stability={getattr(result, 'stability', 'N/A')}")
419
+
420
+ if result.alternatives:
421
+ best_alternative = result.alternatives[0]
422
+ log_info(f"πŸ—£οΈ Transcript: '{best_alternative.transcript}' "
423
+ f"(confidence: {best_alternative.confidence:.3f})")
424
 
425
+ # Put result in queue
426
+ result_obj = TranscriptionResult(
427
+ text=best_alternative.transcript,
428
+ is_final=result.is_final,
429
+ confidence=best_alternative.confidence,
430
+ timestamp=datetime.utcnow()
431
+ )
432
+
433
+ self.responses_queue.put(result_obj)
434
+ log_info(f"βœ… Result queued: is_final={result.is_final}, text='{best_alternative.transcript[:50]}...'")
435
+
436
+ # Log if we exit without any responses
437
+ if response_count == 0:
438
+ log_error("❌ Google STT stream ended without ANY responses!")
439
+ else:
440
+ log_info(f"βœ… Google STT stream ended normally after {response_count} responses")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
 
 
 
 
 
 
 
 
 
 
442
  except Exception as e:
443
+ log_error(f"❌ Google STT error: {e}")
444
+ if hasattr(e, 'details'):
445
+ log_error(f"Error details: {e.details}")
446
+ self.error_message = str(e)
447
  finally:
448
  log_info("🎀 Google STT stream thread ended")
449
+ with self.lock:
450
+ self.is_streaming = False