Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 16 days ago

Commit

f327841

verified ·

1 Parent(s): f3b26f3

Update stt/stt_google.py

Browse files

Files changed (1) hide show

stt/stt_google.py +44 -32

stt/stt_google.py CHANGED Viewed

@@ -267,22 +267,22 @@ class GoogleCloudSTT(STTInterface):
                 await self.stop_streaming()
                 # Temizlik için bekle
                 await asyncio.sleep(0.5)
             # Session verilerini resetle ve ID'yi artır
             self._reset_session_data()
             log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
             # Fresh queue'lar oluştur
             self._create_fresh_queues()
             # Stop event'i temizle
             self.stop_event.clear()
             # Yeni client oluştur (TEK SEFER)
             self.client = speech.SpeechClient()
             log_info("✅ Created new Google Speech client")
             # Convert dict to STTConfig if needed
             if isinstance(config, dict):
                 stt_config = STTConfig(
@@ -290,29 +290,33 @@ class GoogleCloudSTT(STTInterface):
                     sample_rate=config.get("sample_rate", 16000),
                     encoding=config.get("encoding", "WEBM_OPUS"),
                     enable_punctuation=config.get("enable_punctuation", True),
-                    interim_results=config.get("interim_results", True),
-                    single_utterance=config.get("single_utterance", False)
                 )
             else:
                 stt_config = config
             recognition_config = speech.RecognitionConfig(
                 encoding=self._get_encoding(stt_config.encoding),
                 sample_rate_hertz=stt_config.sample_rate,
                 language_code=stt_config.language,
                 enable_automatic_punctuation=stt_config.enable_punctuation,
                 model="latest_long",
-                use_enhanced=True
             )
             self.streaming_config = speech.StreamingRecognitionConfig(
                 config=recognition_config,
                 interim_results=stt_config.interim_results,
                 single_utterance=stt_config.single_utterance
             )
             self.is_streaming = True
             # Start streaming thread with unique name
             self.stream_thread = threading.Thread(
                 target=self._run_stream,
@@ -320,9 +324,9 @@ class GoogleCloudSTT(STTInterface):
             )
             self.stream_thread.daemon = True  # Daemon thread olarak işaretle
             self.stream_thread.start()
             log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
         except Exception as e:
             log_error(f"❌ Failed to start Google STT streaming", error=str(e))
             self.is_streaming = False
@@ -364,8 +368,9 @@ class GoogleCloudSTT(STTInterface):
                             if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
                                 log_info("✅ Valid WEBM header detected")
                             else:
-                                log_error(f"❌ Invalid audio format")
-                                break
                         # Her 50 chunk'ta durum raporu
                         if chunk_count % 50 == 0:
@@ -379,6 +384,8 @@ class GoogleCloudSTT(STTInterface):
                         log_error(f"❌ Error in request generator: {e}")
                         break
             # Create streaming client
             requests = request_generator()
             log_info("🎤 Creating Google STT streaming client...")
@@ -390,26 +397,30 @@ class GoogleCloudSTT(STTInterface):
                     timeout=300
                 )
-                log_info("✅ Google STT streaming client created")
                 for response in responses:
                     if self.stop_event.is_set():
                         log_info("🛑 Stop event detected")
                         break
-                    # Check for speech events (VAD)
-                    if hasattr(response, 'speech_event_type'):
-                        event_type = response.speech_event_type
-                        if event_type == speech.StreamingRecognizeResponse.SpeechEventType.END_OF_SINGLE_UTTERANCE:
-                            log_info("🏁 Google STT: End of single utterance detected")
-                            # Google otomatik olarak stream'i kapatacak
-                            break
                     # Process results
                     if not response.results:
                         continue
                     for result in response.results:
                         if not result.alternatives:
                             continue
@@ -432,26 +443,27 @@ class GoogleCloudSTT(STTInterface):
                                 # Single utterance modunda Google STT otomatik kapanır
                                 if self.streaming_config.single_utterance:
-                                    log_info("✅ Single utterance mode - Google STT will close stream")
-                                    # Loop otomatik sonlanacak
                             else:
                                 log_debug(f"📝 Interim: '{alternative.transcript}'")
-                log_info("📊 Google STT stream ended normally")
             except Exception as e:
                 error_msg = str(e)
-                # Google STT'nin normal kapanma durumları
                 if "iterating requests" in error_msg:
-                    log_info("✅ Google STT stream closed normally (end of utterance)")
                 elif "Exceeded maximum allowed stream duration" in error_msg:
                     log_warning("⚠️ Stream duration limit (5 min)")
                 else:
                     log_error(f"❌ Google STT error: {error_msg}")
         except Exception as e:
-            log_error(f"❌ Fatal error in STT stream", error=str(e))
         finally:
             log_info("🎤 Google STT stream thread ended")
             self.is_streaming = False

                 await self.stop_streaming()
                 # Temizlik için bekle
                 await asyncio.sleep(0.5)
             # Session verilerini resetle ve ID'yi artır
             self._reset_session_data()
             log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
             # Fresh queue'lar oluştur
             self._create_fresh_queues()
             # Stop event'i temizle
             self.stop_event.clear()
             # Yeni client oluştur (TEK SEFER)
             self.client = speech.SpeechClient()
             log_info("✅ Created new Google Speech client")
             # Convert dict to STTConfig if needed
             if isinstance(config, dict):
                 stt_config = STTConfig(
                     sample_rate=config.get("sample_rate", 16000),
                     encoding=config.get("encoding", "WEBM_OPUS"),
                     enable_punctuation=config.get("enable_punctuation", True),
+                    interim_results=config.get("interim_results", False),
+                    single_utterance=config.get("single_utterance", True)
                 )
             else:
                 stt_config = config
             recognition_config = speech.RecognitionConfig(
                 encoding=self._get_encoding(stt_config.encoding),
                 sample_rate_hertz=stt_config.sample_rate,
                 language_code=stt_config.language,
                 enable_automatic_punctuation=stt_config.enable_punctuation,
                 model="latest_long",
+                use_enhanced=True,
+                # Bu parametreleri kaldırıyoruz - v1 API'de yok
+                # enable_voice_activity_events=True,
+                # audio_channel_count=1
             )
             self.streaming_config = speech.StreamingRecognitionConfig(
                 config=recognition_config,
                 interim_results=stt_config.interim_results,
                 single_utterance=stt_config.single_utterance
+                # enable_voice_activity_events kaldırıldı
             )
             self.is_streaming = True
             # Start streaming thread with unique name
             self.stream_thread = threading.Thread(
                 target=self._run_stream,
             )
             self.stream_thread.daemon = True  # Daemon thread olarak işaretle
             self.stream_thread.start()
             log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
         except Exception as e:
             log_error(f"❌ Failed to start Google STT streaming", error=str(e))
             self.is_streaming = False
                             if len(chunk) >= 4 and chunk[:4] == b'\x1a\x45\xdf\xa3':
                                 log_info("✅ Valid WEBM header detected")
                             else:
+                                log_error(f"❌ Invalid audio format, first 4 bytes: {chunk[:4].hex()}")
+                                # Format hatalıysa devam et, Google STT düzeltebilir
+                                # break
                         # Her 50 chunk'ta durum raporu
                         if chunk_count % 50 == 0:
                         log_error(f"❌ Error in request generator: {e}")
                         break
+                log_info(f"📊 Request generator finished. Total chunks: {chunk_count}, Total bytes: {total_bytes}")
             # Create streaming client
             requests = request_generator()
             log_info("🎤 Creating Google STT streaming client...")
                     timeout=300
                 )
+                log_info("✅ Google STT streaming client created, waiting for responses...")
+                # Process responses
+                response_count = 0
+                result_count = 0
                 for response in responses:
+                    response_count += 1
+                    if response_count == 1:
+                        log_info(f"📨 First response received from Google STT")
                     if self.stop_event.is_set():
                         log_info("🛑 Stop event detected")
                         break
                     # Process results
                     if not response.results:
+                        log_debug(f"📭 Response #{response_count} has no results")
                         continue
                     for result in response.results:
+                        result_count += 1
                         if not result.alternatives:
                             continue
                                 # Single utterance modunda Google STT otomatik kapanır
                                 if self.streaming_config.single_utterance:
+                                    log_info("✅ Single utterance mode - Stream will end")
+                                    # Google stream'i kapatacak, biz de çıkalım
+                                    return
                             else:
                                 log_debug(f"📝 Interim: '{alternative.transcript}'")
+                log_info(f"📊 Google STT stream ended. Responses: {response_count}, Results: {result_count}")
             except Exception as e:
                 error_msg = str(e)
+                # Beklenen hatalar
                 if "iterating requests" in error_msg:
+                    log_info("✅ Stream ended normally")
                 elif "Exceeded maximum allowed stream duration" in error_msg:
                     log_warning("⚠️ Stream duration limit (5 min)")
                 else:
                     log_error(f"❌ Google STT error: {error_msg}")
         except Exception as e:
+            log_error(f"❌ Fatal error in STT stream", error=str(e), traceback=traceback.format_exc())
         finally:
             log_info("🎤 Google STT stream thread ended")
             self.is_streaming = False