Spaces:
Building
Building
Update stt/stt_google.py
Browse files- stt/stt_google.py +74 -65
stt/stt_google.py
CHANGED
@@ -322,73 +322,81 @@ class GoogleCloudSTT(STTInterface):
|
|
322 |
async def start_streaming(self, config: STTConfig) -> None:
|
323 |
"""Initialize streaming session with clean state"""
|
324 |
try:
|
325 |
-
#
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
|
|
341 |
|
342 |
-
|
343 |
-
|
344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
|
|
359 |
)
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
self.stop_event.clear()
|
372 |
-
|
373 |
-
# Start streaming thread
|
374 |
-
self.stream_thread = threading.Thread(target=self._run_stream)
|
375 |
-
self.stream_thread.start()
|
376 |
-
|
377 |
-
log_info(f"📋 Streaming config created: interim_results={config.interim_results}, "
|
378 |
-
f"single_utterance={config.single_utterance}, "
|
379 |
-
f"VAD_events=True")
|
380 |
-
|
381 |
-
self.is_streaming = True
|
382 |
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
except Exception as e:
|
394 |
log_error(f"❌ Failed to start Google STT streaming", error=str(e))
|
@@ -400,13 +408,14 @@ class GoogleCloudSTT(STTInterface):
|
|
400 |
def _run_stream(self):
|
401 |
"""Run the streaming recognition loop in a separate thread"""
|
402 |
try:
|
403 |
-
|
|
|
404 |
|
405 |
# Create request generator
|
406 |
requests = self._request_generator()
|
407 |
|
408 |
# Create streaming client
|
409 |
-
log_info("🎤 Creating Google STT streaming client...")
|
410 |
|
411 |
# Get responses (no timeout parameter!)
|
412 |
responses = self.client.streaming_recognize(self.streaming_config, requests)
|
|
|
322 |
async def start_streaming(self, config: STTConfig) -> None:
|
323 |
"""Initialize streaming session with clean state"""
|
324 |
try:
|
325 |
+
# Thread safety için lock kullan
|
326 |
+
async with asyncio.Lock():
|
327 |
+
# Clean up any existing stream
|
328 |
+
if self.is_streaming or self.stream_thread:
|
329 |
+
log_warning("⚠️ Previous stream still active, stopping it first")
|
330 |
+
await self.stop_streaming()
|
331 |
+
await asyncio.sleep(0.5)
|
332 |
+
|
333 |
+
# Double-check after cleanup
|
334 |
+
if self.stream_thread and self.stream_thread.is_alive():
|
335 |
+
log_error(f"❌ Stream thread STILL running after cleanup! Thread: {self.stream_thread.name}")
|
336 |
+
raise Exception("Failed to stop previous stream thread")
|
337 |
+
|
338 |
+
# Reset session
|
339 |
+
self._reset_session()
|
340 |
+
self.single_utterance = config.single_utterance
|
341 |
+
self.current_encoding = config.encoding
|
342 |
|
343 |
+
log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
|
344 |
+
|
345 |
+
# Create fresh queues
|
346 |
+
self._create_fresh_queues()
|
347 |
+
self.stop_event.clear()
|
348 |
+
self.should_stop = False
|
349 |
+
|
350 |
+
# Create new client
|
351 |
+
self.client = speech.SpeechClient()
|
352 |
+
log_info("✅ Created new Google Speech client")
|
353 |
|
354 |
+
# Create recognition config
|
355 |
+
recognition_config = speech.RecognitionConfig(
|
356 |
+
encoding=speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
|
357 |
+
sample_rate_hertz=16000,
|
358 |
+
language_code="tr-TR",
|
359 |
+
enable_automatic_punctuation=True,
|
360 |
+
model="latest_long",
|
361 |
+
use_enhanced=True,
|
362 |
+
max_alternatives=1,
|
363 |
+
metadata=speech.RecognitionMetadata(
|
364 |
+
interaction_type=speech.RecognitionMetadata.InteractionType.VOICE_SEARCH,
|
365 |
+
microphone_distance=speech.RecognitionMetadata.MicrophoneDistance.NEARFIELD,
|
366 |
+
recording_device_type=speech.RecognitionMetadata.RecordingDeviceType.PC,
|
367 |
+
)
|
368 |
)
|
369 |
+
|
370 |
+
# Create streaming config with VAD
|
371 |
+
self.streaming_config = speech.StreamingRecognitionConfig(
|
372 |
+
config=recognition_config,
|
373 |
+
interim_results=True,
|
374 |
+
single_utterance=False,
|
375 |
+
enable_voice_activity_events=True # ✅ VAD events enabled
|
376 |
+
)
|
377 |
+
|
378 |
+
self.is_streaming = True
|
379 |
+
self.stop_event.clear()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
+
# Thread başlatmadan önce son kontrol
|
382 |
+
if self.stream_thread is not None:
|
383 |
+
log_error("❌ stream_thread should be None at this point!")
|
384 |
+
self.stream_thread = None
|
385 |
+
|
386 |
+
self.is_streaming = True
|
387 |
+
|
388 |
+
# Start streaming thread with unique ID
|
389 |
+
thread_id = f"GoogleSTT-Session-{self.session_id}-{int(time.time()*1000)}"
|
390 |
+
self.stream_thread = threading.Thread(
|
391 |
+
target=self._run_stream,
|
392 |
+
name=thread_id
|
393 |
+
)
|
394 |
+
self.stream_thread.daemon = True
|
395 |
+
|
396 |
+
log_info(f"🚀 Starting thread: {thread_id}")
|
397 |
+
self.stream_thread.start()
|
398 |
+
|
399 |
+
log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
|
400 |
|
401 |
except Exception as e:
|
402 |
log_error(f"❌ Failed to start Google STT streaming", error=str(e))
|
|
|
408 |
def _run_stream(self):
|
409 |
"""Run the streaming recognition loop in a separate thread"""
|
410 |
try:
|
411 |
+
thread_id = threading.current_thread().ident
|
412 |
+
log_info(f"🎤 Google STT stream thread started - Thread ID: {thread_id}, Session: {self.session_id}")
|
413 |
|
414 |
# Create request generator
|
415 |
requests = self._request_generator()
|
416 |
|
417 |
# Create streaming client
|
418 |
+
log_info(f"🎤 Creating Google STT streaming client... Thread ID: {thread_id}")
|
419 |
|
420 |
# Get responses (no timeout parameter!)
|
421 |
responses = self.client.streaming_recognize(self.streaming_config, requests)
|