ciyidogan commited on
Commit
cc4dbca
Β·
verified Β·
1 Parent(s): 40f81e2

Update stt/stt_google.py

Browse files
Files changed (1) hide show
  1. stt/stt_google.py +66 -154
stt/stt_google.py CHANGED
@@ -4,7 +4,7 @@ Google Cloud Speech-to-Text Implementation
4
  import os
5
  import asyncio
6
  from typing import AsyncIterator, AsyncGenerator, Optional, List, Any
7
- import numpy as np # Audio level check iΓ§in
8
  from datetime import datetime
9
  import sys
10
  import queue
@@ -43,16 +43,15 @@ class GoogleCloudSTT(STTInterface):
43
  self.session_id = 0
44
  self.stream_start_time = None
45
 
46
- # βœ… Eksik attribute'larΔ± ekleyelim
47
- self.lock = threading.Lock() # Thread lock
48
- self.single_utterance = False # Default value
49
- self.chunk_count = 0 # Audio chunk counter
50
- self.total_bytes = 0 # Total bytes received
51
- self.stop_event = threading.Event() # βœ… Stop event for thread coordination
52
 
53
  # Set Google credentials
54
  if credentials_path:
55
- # ConfigProvider'dan gelen path'i kullan
56
  if os.path.exists(credentials_path):
57
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
58
  log_info(f"βœ… Google credentials set from: {credentials_path}")
@@ -63,14 +62,12 @@ class GoogleCloudSTT(STTInterface):
63
  # Fallback to environment variable
64
  creds_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
65
  if not creds_path:
66
- # Try default location
67
  creds_path = "./credentials/google-service-account.json"
68
  if os.path.exists(creds_path):
69
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path
70
  log_info(f"βœ… Google credentials set from default: {creds_path}")
71
  else:
72
  raise ValueError("Google credentials not found. Please provide credentials_path")
73
-
74
 
75
  # Test credentials
76
  try:
@@ -95,19 +92,20 @@ class GoogleCloudSTT(STTInterface):
95
  }
96
  return encoding_map.get(encoding_str, speech.RecognitionConfig.AudioEncoding.WEBM_OPUS)
97
 
 
 
 
98
  async def stream_audio(self, audio_chunk: bytes) -> AsyncGenerator[TranscriptionResult, None]:
99
  """Stream audio chunk and get results"""
100
  if not self.is_streaming:
101
  raise Exception("Streaming not started")
102
 
103
  try:
104
- # Audio validation and logging
105
  chunk_size = len(audio_chunk)
106
 
107
  # Log first chunk details
108
  if self.chunk_count == 0:
109
  log_info(f"πŸ“€ First chunk - size: {chunk_size} bytes")
110
- # Check for WEBM header
111
  if audio_chunk.startswith(b'\x1a\x45\xdf\xa3'):
112
  log_info("βœ… Valid WEBM header detected")
113
  else:
@@ -116,16 +114,14 @@ class GoogleCloudSTT(STTInterface):
116
 
117
  # Try to measure audio level (if it's raw PCM)
118
  try:
119
- import numpy as np
120
- # This might fail for WEBM, but let's try
121
- audio_array = np.frombuffer(audio_chunk[-1000:], dtype=np.int16) # Last 1000 bytes
122
- if len(audio_array) > 0:
123
- rms = np.sqrt(np.mean(audio_array.astype(float) ** 2))
124
- db = 20 * np.log10(max(rms, 1) / 32768.0)
125
- if self.chunk_count % 50 == 0:
126
- log_info(f"πŸ”Š Audio level estimate: {db:.1f} dB")
127
  except:
128
- # Expected for WEBM format
129
  pass
130
 
131
  # Put chunk in queue
@@ -137,8 +133,8 @@ class GoogleCloudSTT(STTInterface):
137
  if self.chunk_count % 50 == 0:
138
  log_info(f"πŸ“€ Progress: {self.chunk_count} chunks, {self.total_bytes/1024:.1f}KB total")
139
 
140
- # Check for responses with timeout
141
- timeout = 0.1 # 100ms timeout for checking responses
142
  end_time = time.time() + timeout
143
 
144
  while time.time() < end_time:
@@ -147,7 +143,6 @@ class GoogleCloudSTT(STTInterface):
147
  log_info(f"🎯 Got result from queue: is_final={result.is_final}, text='{result.text[:30]}...'")
148
  yield result
149
  except queue.Empty:
150
- # No results yet, continue
151
  await asyncio.sleep(0.01)
152
  except Exception as e:
153
  log_error(f"Error getting result from queue: {e}")
@@ -166,18 +161,19 @@ class GoogleCloudSTT(STTInterface):
166
  try:
167
  log_info(f"πŸ›‘ Stopping Google STT streaming session #{self.session_id}")
168
 
169
- # Flag'i hemen kapat
170
  self.is_streaming = False
 
171
  self.stop_event.set()
172
 
173
- # Send poison pill to stop request generator
174
  if self.audio_queue:
175
  try:
176
  self.audio_queue.put(None)
177
  except:
178
  pass
179
 
180
- # Thread'i durdur
181
  if self.stream_thread and self.stream_thread.is_alive():
182
  log_info("⏳ Waiting for stream thread to finish...")
183
  self.stream_thread.join(timeout=5.0)
@@ -187,18 +183,18 @@ class GoogleCloudSTT(STTInterface):
187
  else:
188
  log_info("βœ… Stream thread finished")
189
 
190
- # Final result'Δ± al - βœ… BURADA DÜZELTME
191
  final_result = None
192
  if self.responses_queue:
193
  while not self.responses_queue.empty():
194
  try:
195
- result = self.responses_queue.get_nowait() # βœ… await değil, get_nowait()
196
  if result.is_final:
197
  final_result = result
198
- except queue.Empty: # βœ… queue.Empty kullan
199
  break
200
 
201
- # Client'Δ± kapat
202
  if self.client:
203
  try:
204
  if hasattr(self.client, 'transport') and hasattr(self.client.transport, 'close'):
@@ -213,11 +209,9 @@ class GoogleCloudSTT(STTInterface):
213
  finally:
214
  self.client = None
215
 
216
- # Queue'larΔ± None yap
217
  self.audio_queue = None
218
  self.responses_queue = None
219
-
220
- # Diğer değişkenleri resetle
221
  self.stream_thread = None
222
  self.streaming_config = None
223
  self.stop_event.clear()
@@ -227,7 +221,6 @@ class GoogleCloudSTT(STTInterface):
227
 
228
  except Exception as e:
229
  log_error(f"❌ Error during stop_streaming", error=str(e))
230
- # Force cleanup on error
231
  self.is_streaming = False
232
  self.stream_thread = None
233
  self.client = None
@@ -244,19 +237,8 @@ class GoogleCloudSTT(STTInterface):
244
  def get_supported_languages(self) -> List[str]:
245
  """Get list of supported language codes"""
246
  return [
247
- "tr-TR", # Turkish
248
- "en-US", # English (US)
249
- "en-GB", # English (UK)
250
- "de-DE", # German
251
- "fr-FR", # French
252
- "es-ES", # Spanish
253
- "it-IT", # Italian
254
- "pt-BR", # Portuguese (Brazil)
255
- "ru-RU", # Russian
256
- "ja-JP", # Japanese
257
- "ko-KR", # Korean
258
- "zh-CN", # Chinese (Simplified)
259
- "ar-SA", # Arabic
260
  ]
261
 
262
  def get_provider_name(self) -> str:
@@ -283,21 +265,18 @@ class GoogleCloudSTT(STTInterface):
283
  self.error_message = None
284
  self.session_id += 1
285
  self.stream_start_time = time.time()
286
-
287
- # βœ… Counter'larΔ± sΔ±fΔ±rla
288
  self.chunk_count = 0
289
  self.total_bytes = 0
290
 
291
  log_info(f"πŸ”„ Google STT session data reset. New session ID: {self.session_id}")
292
 
293
- # Create fresh queues to be extra safe
294
  self.audio_queue = queue.Queue()
295
  self.responses_queue = queue.Queue()
296
  log_debug("βœ… Created fresh queues")
297
 
298
  def _create_fresh_queues(self):
299
  """Create fresh queue instances"""
300
- # Eski queue'larΔ± temizle
301
  if self.audio_queue:
302
  while not self.audio_queue.empty():
303
  try:
@@ -312,35 +291,27 @@ class GoogleCloudSTT(STTInterface):
312
  except:
313
  pass
314
 
315
- # Yeni queue'lar oluştur
316
- self.audio_queue = queue.Queue(maxsize=1000) # Max size ekle
317
  self.responses_queue = queue.Queue(maxsize=100)
318
  log_debug("βœ… Created fresh queues")
319
 
320
  def _request_generator(self):
321
  """Generate requests for the streaming recognize API"""
322
- # First request must contain only the config
323
  yield speech.StreamingRecognizeRequest(streaming_config=self.streaming_config)
324
 
325
- # Subsequent requests should contain audio chunks
326
  while not self.should_stop:
327
  try:
328
- # Get audio chunk from queue with timeout
329
  audio_chunk = self.audio_queue.get(timeout=0.1)
330
 
331
  if audio_chunk is None:
332
- # Poison pill received
333
  log_info("πŸ“› Poison pill received, stopping request generator")
334
  break
335
 
336
- # Send audio chunk
337
  yield speech.StreamingRecognizeRequest(audio_content=audio_chunk)
338
 
339
- self.chunk_count += 1
340
- self.total_bytes += len(audio_chunk)
341
-
342
  except queue.Empty:
343
- # No audio available, continue waiting
344
  continue
345
  except Exception as e:
346
  log_error(f"Error in request generator: {e}")
@@ -348,30 +319,27 @@ class GoogleCloudSTT(STTInterface):
348
 
349
  log_info(f"πŸ“Š Request generator finished. Total chunks: {self.chunk_count}, Total bytes: {self.total_bytes}")
350
 
351
- async def start_streaming(self, config: dict) -> None:
352
  """Initialize streaming session with clean state"""
353
  try:
354
- # Γ–nce mevcut stream'i temizle
355
  if self.is_streaming or self.stream_thread:
356
  log_warning("⚠️ Previous stream still active, stopping it first")
357
  await self.stop_streaming()
358
- # Temizlik iΓ§in bekle
359
  await asyncio.sleep(0.5)
360
 
361
- # Session verilerini resetle ve ID'yi artΔ±r
362
  self._reset_session()
363
-
364
  self.single_utterance = config.single_utterance
365
 
366
  log_info(f"🎀 Starting Google STT streaming session #{self.session_id} with config: {config}")
367
 
368
- # Fresh queue'lar oluştur
369
  self._create_fresh_queues()
370
-
371
- # Stop event'i temizle
372
  self.stop_event.clear()
 
373
 
374
- # Yeni client oluştur (TEK SEFER)
375
  self.client = speech.SpeechClient()
376
  log_info("βœ… Created new Google Speech client")
377
 
@@ -384,20 +352,19 @@ class GoogleCloudSTT(STTInterface):
384
  model=config.model,
385
  use_enhanced=config.use_enhanced,
386
  max_alternatives=1,
387
- # Metadata for better recognition
388
  metadata=speech.RecognitionMetadata(
389
  interaction_type=speech.RecognitionMetadata.InteractionType.VOICE_SEARCH,
390
  microphone_distance=speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD,
391
  recording_device_type=speech.RecognitionMetadata.RecordingDeviceType.PC,
392
  )
393
- ) # βœ… Parantez burada kapanmalΔ±
394
 
395
  # Create streaming config with VAD
396
  self.streaming_config = speech.StreamingRecognitionConfig(
397
  config=recognition_config,
398
- interim_results=config.interim_results, # βœ… Bu zaten True
399
- single_utterance=config.single_utterance, # βœ… Bu False
400
- enable_voice_activity_events=True # βœ… VAD events'leri aΓ§Δ±yoruz
401
  )
402
 
403
  log_info(f"πŸ“‹ Streaming config created: interim_results={config.interim_results}, "
@@ -406,12 +373,12 @@ class GoogleCloudSTT(STTInterface):
406
 
407
  self.is_streaming = True
408
 
409
- # Start streaming thread with unique name
410
  self.stream_thread = threading.Thread(
411
  target=self._run_stream,
412
  name=f"GoogleSTT-Session-{self.session_id}"
413
  )
414
- self.stream_thread.daemon = True # Daemon thread olarak işaretle
415
  self.stream_thread.start()
416
 
417
  log_info(f"βœ… Google STT streaming session #{self.session_id} started successfully")
@@ -420,55 +387,33 @@ class GoogleCloudSTT(STTInterface):
420
  log_error(f"❌ Failed to start Google STT streaming", error=str(e))
421
  self.is_streaming = False
422
  self.client = None
423
- self._create_fresh_queues() # Hata durumunda da queue'larΔ± temizle
424
  raise
425
 
426
- def _put_result(self, result: TranscriptionResult):
427
- """Helper to put result in queue"""
428
- try:
429
- self.responses_queue.put(result)
430
- # Debug log'u kaldΔ±rdΔ±k
431
- except Exception as e:
432
- log_error(f"❌ Error queuing result: {e}")
433
-
434
  def _run_stream(self):
435
  """Run the streaming recognition loop in a separate thread"""
436
  try:
437
- log_info("🎀 Google STT stream thread started - Single utterance mode: {}".format(self.single_utterance))
438
 
439
  # Create request generator
440
  requests = self._request_generator()
441
 
442
  # Create streaming client
443
  log_info("🎀 Creating Google STT streaming client...")
444
-
445
- # Set a timeout for the streaming call
446
- import grpc
447
- timeout = 300 # 5 minutes max for the stream
448
-
449
- # Create streaming client with timeout
450
- responses = self.client.streaming_recognize(
451
- self.streaming_config,
452
- requests,
453
- timeout=timeout
454
- )
455
-
456
- # Set initial response timeout
457
- initial_response_timeout = 30 # 30 seconds to get first response
458
- stream_start = time.time()
459
- got_first_response = False
460
 
461
- # Track if we've received any response
 
 
 
462
  first_response_time = None
463
  response_count = 0
464
 
465
- # Process responses with detailed logging
466
  for response in responses:
467
- if not got_first_response:
468
- got_first_response = True
469
- elapsed = time.time() - stream_start
470
- log_info(f"βœ… Got first response from Google after {elapsed:.2f}s")
471
-
472
  response_count += 1
473
 
474
  if first_response_time is None:
@@ -476,47 +421,14 @@ class GoogleCloudSTT(STTInterface):
476
  elapsed = first_response_time - self.stream_start_time
477
  log_info(f"πŸŽ‰ FIRST RESPONSE from Google STT after {elapsed:.2f}s")
478
 
479
- # Log every response, even if empty
480
- log_info(f"πŸ“¨ Google STT Response #{response_count}: has_results={len(response.results) > 0}")
481
-
482
- if not response.results:
483
- log_info("πŸ“­ Empty response from Google STT (no results)")
484
- continue
485
-
486
- # Log all results in detail
487
- for result_idx, result in enumerate(response.results):
488
- log_info(f"πŸ“ Result #{result_idx}: is_final={result.is_final}, "
489
- f"alternatives={len(result.alternatives)}, "
490
- f"stability={getattr(result, 'stability', 'N/A')}")
491
 
492
- if result.alternatives:
493
- best_alternative = result.alternatives[0]
494
- log_info(f"πŸ—£οΈ Transcript: '{best_alternative.transcript}' "
495
- f"(confidence: {best_alternative.confidence:.3f})")
496
-
497
- # Put result in queue
498
- result_obj = TranscriptionResult(
499
- text=best_alternative.transcript,
500
- is_final=result.is_final,
501
- confidence=best_alternative.confidence,
502
- timestamp=datetime.utcnow()
503
- )
504
-
505
- self.responses_queue.put(result_obj)
506
- log_info(f"βœ… Result queued: is_final={result.is_final}, text='{best_alternative.transcript[:50]}...'")
507
-
508
- # Log if we exit without any responses
509
- if response_count == 0:
510
- log_error("❌ Google STT stream ended without ANY responses!")
511
- else:
512
- log_info(f"βœ… Google STT stream ended normally after {response_count} responses")
513
 
514
- except Exception as e:
515
- log_error(f"❌ Google STT error: {e}")
516
- if hasattr(e, 'details'):
517
- log_error(f"Error details: {e.details}")
518
- self.error_message = str(e)
519
- finally:
520
- log_info("🎀 Google STT stream thread ended")
521
- with self.lock:
522
- self.is_streaming = False
 
4
  import os
5
  import asyncio
6
  from typing import AsyncIterator, AsyncGenerator, Optional, List, Any
7
+ import numpy as np
8
  from datetime import datetime
9
  import sys
10
  import queue
 
43
  self.session_id = 0
44
  self.stream_start_time = None
45
 
46
+ # Additional attributes
47
+ self.lock = threading.Lock()
48
+ self.single_utterance = False
49
+ self.chunk_count = 0
50
+ self.total_bytes = 0
51
+ self.stop_event = threading.Event()
52
 
53
  # Set Google credentials
54
  if credentials_path:
 
55
  if os.path.exists(credentials_path):
56
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
57
  log_info(f"βœ… Google credentials set from: {credentials_path}")
 
62
  # Fallback to environment variable
63
  creds_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
64
  if not creds_path:
 
65
  creds_path = "./credentials/google-service-account.json"
66
  if os.path.exists(creds_path):
67
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path
68
  log_info(f"βœ… Google credentials set from default: {creds_path}")
69
  else:
70
  raise ValueError("Google credentials not found. Please provide credentials_path")
 
71
 
72
  # Test credentials
73
  try:
 
92
  }
93
  return encoding_map.get(encoding_str, speech.RecognitionConfig.AudioEncoding.WEBM_OPUS)
94
 
95
+ # Alias for compatibility
96
+ _get_google_encoding = _get_encoding
97
+
98
  async def stream_audio(self, audio_chunk: bytes) -> AsyncGenerator[TranscriptionResult, None]:
99
  """Stream audio chunk and get results"""
100
  if not self.is_streaming:
101
  raise Exception("Streaming not started")
102
 
103
  try:
 
104
  chunk_size = len(audio_chunk)
105
 
106
  # Log first chunk details
107
  if self.chunk_count == 0:
108
  log_info(f"πŸ“€ First chunk - size: {chunk_size} bytes")
 
109
  if audio_chunk.startswith(b'\x1a\x45\xdf\xa3'):
110
  log_info("βœ… Valid WEBM header detected")
111
  else:
 
114
 
115
  # Try to measure audio level (if it's raw PCM)
116
  try:
117
+ if encoding_str == "LINEAR16": # Only for raw PCM
118
+ audio_array = np.frombuffer(audio_chunk, dtype=np.int16)
119
+ if len(audio_array) > 0:
120
+ rms = np.sqrt(np.mean(audio_array.astype(float) ** 2))
121
+ db = 20 * np.log10(max(rms, 1) / 32768.0)
122
+ if self.chunk_count % 50 == 0:
123
+ log_info(f"πŸ”Š Audio level: {db:.1f} dB")
 
124
  except:
 
125
  pass
126
 
127
  # Put chunk in queue
 
133
  if self.chunk_count % 50 == 0:
134
  log_info(f"πŸ“€ Progress: {self.chunk_count} chunks, {self.total_bytes/1024:.1f}KB total")
135
 
136
+ # Check for responses
137
+ timeout = 0.1
138
  end_time = time.time() + timeout
139
 
140
  while time.time() < end_time:
 
143
  log_info(f"🎯 Got result from queue: is_final={result.is_final}, text='{result.text[:30]}...'")
144
  yield result
145
  except queue.Empty:
 
146
  await asyncio.sleep(0.01)
147
  except Exception as e:
148
  log_error(f"Error getting result from queue: {e}")
 
161
  try:
162
  log_info(f"πŸ›‘ Stopping Google STT streaming session #{self.session_id}")
163
 
164
+ # Set flags
165
  self.is_streaming = False
166
+ self.should_stop = True
167
  self.stop_event.set()
168
 
169
+ # Send poison pill
170
  if self.audio_queue:
171
  try:
172
  self.audio_queue.put(None)
173
  except:
174
  pass
175
 
176
+ # Wait for thread
177
  if self.stream_thread and self.stream_thread.is_alive():
178
  log_info("⏳ Waiting for stream thread to finish...")
179
  self.stream_thread.join(timeout=5.0)
 
183
  else:
184
  log_info("βœ… Stream thread finished")
185
 
186
+ # Get final result
187
  final_result = None
188
  if self.responses_queue:
189
  while not self.responses_queue.empty():
190
  try:
191
+ result = self.responses_queue.get_nowait()
192
  if result.is_final:
193
  final_result = result
194
+ except queue.Empty:
195
  break
196
 
197
+ # Close client
198
  if self.client:
199
  try:
200
  if hasattr(self.client, 'transport') and hasattr(self.client.transport, 'close'):
 
209
  finally:
210
  self.client = None
211
 
212
+ # Reset state
213
  self.audio_queue = None
214
  self.responses_queue = None
 
 
215
  self.stream_thread = None
216
  self.streaming_config = None
217
  self.stop_event.clear()
 
221
 
222
  except Exception as e:
223
  log_error(f"❌ Error during stop_streaming", error=str(e))
 
224
  self.is_streaming = False
225
  self.stream_thread = None
226
  self.client = None
 
237
  def get_supported_languages(self) -> List[str]:
238
  """Get list of supported language codes"""
239
  return [
240
+ "tr-TR", "en-US", "en-GB", "de-DE", "fr-FR", "es-ES",
241
+ "it-IT", "pt-BR", "ru-RU", "ja-JP", "ko-KR", "zh-CN", "ar-SA"
 
 
 
 
 
 
 
 
 
 
 
242
  ]
243
 
244
  def get_provider_name(self) -> str:
 
265
  self.error_message = None
266
  self.session_id += 1
267
  self.stream_start_time = time.time()
 
 
268
  self.chunk_count = 0
269
  self.total_bytes = 0
270
 
271
  log_info(f"πŸ”„ Google STT session data reset. New session ID: {self.session_id}")
272
 
273
+ # Create fresh queues
274
  self.audio_queue = queue.Queue()
275
  self.responses_queue = queue.Queue()
276
  log_debug("βœ… Created fresh queues")
277
 
278
  def _create_fresh_queues(self):
279
  """Create fresh queue instances"""
 
280
  if self.audio_queue:
281
  while not self.audio_queue.empty():
282
  try:
 
291
  except:
292
  pass
293
 
294
+ self.audio_queue = queue.Queue(maxsize=1000)
 
295
  self.responses_queue = queue.Queue(maxsize=100)
296
  log_debug("βœ… Created fresh queues")
297
 
298
  def _request_generator(self):
299
  """Generate requests for the streaming recognize API"""
300
+ # First request with config
301
  yield speech.StreamingRecognizeRequest(streaming_config=self.streaming_config)
302
 
303
+ # Audio chunks
304
  while not self.should_stop:
305
  try:
 
306
  audio_chunk = self.audio_queue.get(timeout=0.1)
307
 
308
  if audio_chunk is None:
 
309
  log_info("πŸ“› Poison pill received, stopping request generator")
310
  break
311
 
 
312
  yield speech.StreamingRecognizeRequest(audio_content=audio_chunk)
313
 
 
 
 
314
  except queue.Empty:
 
315
  continue
316
  except Exception as e:
317
  log_error(f"Error in request generator: {e}")
 
319
 
320
  log_info(f"πŸ“Š Request generator finished. Total chunks: {self.chunk_count}, Total bytes: {self.total_bytes}")
321
 
322
+ async def start_streaming(self, config: STTConfig) -> None:
323
  """Initialize streaming session with clean state"""
324
  try:
325
+ # Clean up any existing stream
326
  if self.is_streaming or self.stream_thread:
327
  log_warning("⚠️ Previous stream still active, stopping it first")
328
  await self.stop_streaming()
 
329
  await asyncio.sleep(0.5)
330
 
331
+ # Reset session
332
  self._reset_session()
 
333
  self.single_utterance = config.single_utterance
334
 
335
  log_info(f"🎀 Starting Google STT streaming session #{self.session_id} with config: {config}")
336
 
337
+ # Create fresh queues
338
  self._create_fresh_queues()
 
 
339
  self.stop_event.clear()
340
+ self.should_stop = False
341
 
342
+ # Create new client
343
  self.client = speech.SpeechClient()
344
  log_info("βœ… Created new Google Speech client")
345
 
 
352
  model=config.model,
353
  use_enhanced=config.use_enhanced,
354
  max_alternatives=1,
 
355
  metadata=speech.RecognitionMetadata(
356
  interaction_type=speech.RecognitionMetadata.InteractionType.VOICE_SEARCH,
357
  microphone_distance=speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD,
358
  recording_device_type=speech.RecognitionMetadata.RecordingDeviceType.PC,
359
  )
360
+ )
361
 
362
  # Create streaming config with VAD
363
  self.streaming_config = speech.StreamingRecognitionConfig(
364
  config=recognition_config,
365
+ interim_results=config.interim_results,
366
+ single_utterance=config.single_utterance,
367
+ enable_voice_activity_events=True # βœ… VAD events enabled
368
  )
369
 
370
  log_info(f"πŸ“‹ Streaming config created: interim_results={config.interim_results}, "
 
373
 
374
  self.is_streaming = True
375
 
376
+ # Start streaming thread
377
  self.stream_thread = threading.Thread(
378
  target=self._run_stream,
379
  name=f"GoogleSTT-Session-{self.session_id}"
380
  )
381
+ self.stream_thread.daemon = True
382
  self.stream_thread.start()
383
 
384
  log_info(f"βœ… Google STT streaming session #{self.session_id} started successfully")
 
387
  log_error(f"❌ Failed to start Google STT streaming", error=str(e))
388
  self.is_streaming = False
389
  self.client = None
390
+ self._create_fresh_queues()
391
  raise
392
 
 
 
 
 
 
 
 
 
393
  def _run_stream(self):
394
  """Run the streaming recognition loop in a separate thread"""
395
  try:
396
+ log_info(f"🎀 Google STT stream thread started - Single utterance mode: {self.single_utterance}")
397
 
398
  # Create request generator
399
  requests = self._request_generator()
400
 
401
  # Create streaming client
402
  log_info("🎀 Creating Google STT streaming client...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
 
404
+ # Get responses (no timeout parameter!)
405
+ responses = self.client.streaming_recognize(self.streaming_config, requests)
406
+
407
+ # Track responses
408
  first_response_time = None
409
  response_count = 0
410
 
411
+ # Process responses
412
  for response in responses:
413
+ if self.should_stop:
414
+ log_info("πŸ›‘ Stop flag detected, ending stream")
415
+ break
416
+
 
417
  response_count += 1
418
 
419
  if first_response_time is None:
 
421
  elapsed = first_response_time - self.stream_start_time
422
  log_info(f"πŸŽ‰ FIRST RESPONSE from Google STT after {elapsed:.2f}s")
423
 
424
+ # Check for VAD events
425
+ if hasattr(response, 'speech_event_type') and response.speech_event_type:
426
+ event_type = response.speech_event_type
427
+ log_info(f"πŸŽ™οΈ VAD Event: {event_type}")
 
 
 
 
 
 
 
 
428
 
429
+ if event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
430
+ log_info("πŸ”š End of utterance detected by VAD")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
+ # Log response
433
+ has_results = len(response.results) > 0 if hasattr(response, 'results') else False
434
+ log_info(f"πŸ“¨ Google STT Response #{response_count}: has_results={has_results}")