Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 17 days ago

Commit

30f7ccb

verified ·

1 Parent(s): 855e594

Update stt_google.py

Browse files

Files changed (1) hide show

stt_google.py +105 -32

stt_google.py CHANGED Viewed

@@ -3,14 +3,16 @@ Google Cloud Speech-to-Text Implementation
 """
 import os
 import asyncio
-from typing import AsyncIterator, Optional, List
 from datetime import datetime
 import sys
 from logger import log_info, log_error, log_debug, log_warning
 # Import Google Cloud Speech only if available
 try:
-    from google.cloud import speech_v1p1beta1 as speech
     from google.api_core import exceptions
     GOOGLE_SPEECH_AVAILABLE = True
 except ImportError:
@@ -32,25 +34,26 @@ class GoogleCloudSTT(STTInterface):
         else:
             log_info("⚠️ Google credentials path not found, using default credentials")
-        self.client = speech.SpeechAsyncClient()
         self.streaming_config = None
         self.is_streaming = False
-        self.audio_queue = asyncio.Queue()
-    async def start_streaming(self, config: STTConfig) -> None:
         """Initialize streaming session"""
         try:
-            # Config'in dict mi STTConfig objesi mi olduğunu kontrol et
             if isinstance(config, dict):
-                # Dict ise STTConfig objesine çevir
                 stt_config = STTConfig(
                     language=config.get("language", "tr-TR"),
                     sample_rate=config.get("sample_rate", 16000),
                     encoding=config.get("encoding", "WEBM_OPUS"),
                     enable_punctuation=config.get("enable_punctuation", True),
-                    enable_word_timestamps=config.get("enable_word_timestamps", False),
-                    model=config.get("model", "latest_long"),
-                    use_enhanced=config.get("use_enhanced", True),
                     interim_results=config.get("interim_results", True),
                     single_utterance=config.get("single_utterance", False)
                 )
@@ -62,9 +65,8 @@ class GoogleCloudSTT(STTInterface):
                 sample_rate_hertz=stt_config.sample_rate,
                 language_code=stt_config.language,
                 enable_automatic_punctuation=stt_config.enable_punctuation,
-                enable_word_time_offsets=stt_config.enable_word_timestamps,
-                model=stt_config.model,
-                use_enhanced=stt_config.use_enhanced
             )
             self.streaming_config = speech.StreamingRecognitionConfig(
@@ -74,32 +76,84 @@ class GoogleCloudSTT(STTInterface):
             )
             self.is_streaming = True
-            log_info("✅ Google STT streaming started")
         except Exception as e:
-            log_error("❌ Failed to start Google STT streaming", e)
             raise
     async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
         """Stream audio chunk and get transcription results"""
         if not self.is_streaming:
             raise RuntimeError("Streaming not started. Call start_streaming() first.")
         try:
-            # Add audio to queue
-            await self.audio_queue.put(audio_chunk)
-            # Process with Google STT
-            request = speech.StreamingRecognizeRequest(audio_content=audio_chunk)
-            # This is a simplified version - actual implementation would need
-            # proper streaming handling with Google's API
-            # For now, return empty iterator
-            return
-            yield  # Make it a generator
         except Exception as e:
-            log_error("❌ Google STT streaming error", e)
             raise
     async def stop_streaming(self) -> Optional[TranscriptionResult]:
@@ -108,15 +162,34 @@ class GoogleCloudSTT(STTInterface):
             return None
         try:
             self.is_streaming = False
-            log_info("✅ Google STT streaming stopped")
-            # Return final result if any
-            return None
         except Exception as e:
-            log_error("❌ Failed to stop Google STT streaming", e)
-            raise
     def supports_realtime(self) -> bool:
         """Google Cloud STT supports real-time streaming"""

 """
 import os
 import asyncio
+from typing import AsyncIterator, Optional, List, Any
 from datetime import datetime
 import sys
+import queue
+import threading
 from logger import log_info, log_error, log_debug, log_warning
 # Import Google Cloud Speech only if available
 try:
+    from google.cloud import speech
     from google.api_core import exceptions
     GOOGLE_SPEECH_AVAILABLE = True
 except ImportError:
         else:
             log_info("⚠️ Google credentials path not found, using default credentials")
+        self.client = speech.SpeechClient()
         self.streaming_config = None
         self.is_streaming = False
+        self.audio_queue = queue.Queue()
+        self.responses_queue = asyncio.Queue()
+        self.stream_thread = None
+        self.stop_event = threading.Event()
+    async def start_streaming(self, config: dict) -> None:
         """Initialize streaming session"""
         try:
+            log_info(f"🎤 Starting Google STT streaming with config: {config}")
+            # Convert dict to STTConfig if needed
             if isinstance(config, dict):
                 stt_config = STTConfig(
                     language=config.get("language", "tr-TR"),
                     sample_rate=config.get("sample_rate", 16000),
                     encoding=config.get("encoding", "WEBM_OPUS"),
                     enable_punctuation=config.get("enable_punctuation", True),
                     interim_results=config.get("interim_results", True),
                     single_utterance=config.get("single_utterance", False)
                 )
                 sample_rate_hertz=stt_config.sample_rate,
                 language_code=stt_config.language,
                 enable_automatic_punctuation=stt_config.enable_punctuation,
+                model="latest_long",
+                use_enhanced=True
             )
             self.streaming_config = speech.StreamingRecognitionConfig(
             )
             self.is_streaming = True
+            self.stop_event.clear()
+            # Start streaming thread
+            self.stream_thread = threading.Thread(target=self._run_stream)
+            self.stream_thread.start()
+            log_info("✅ Google STT streaming started successfully")
         except Exception as e:
+            log_error(f"❌ Failed to start Google STT streaming", error=str(e))
+            self.is_streaming = False
             raise
+    def _run_stream(self):
+        """Run the streaming recognition in a separate thread"""
+        try:
+            log_info("🎤 Google STT stream thread started")
+            def request_generator():
+                """Generate streaming requests"""
+                while not self.stop_event.is_set():
+                    try:
+                        # Get audio chunk with timeout
+                        chunk = self.audio_queue.get(timeout=0.1)
+                        if chunk is None:  # Poison pill
+                            break
+                        yield speech.StreamingRecognizeRequest(audio_content=chunk)
+                    except queue.Empty:
+                        continue
+            # Create streaming client
+            requests = request_generator()
+            responses = self.client.streaming_recognize(self.streaming_config, requests)
+            # Process responses
+            for response in responses:
+                if self.stop_event.is_set():
+                    break
+                for result in response.results:
+                    if result.alternatives:
+                        # Put result in async queue
+                        asyncio.run_coroutine_threadsafe(
+                            self.responses_queue.put(TranscriptionResult(
+                                text=result.alternatives[0].transcript,
+                                is_final=result.is_final,
+                                confidence=result.alternatives[0].confidence if result.alternatives[0].confidence else 0.0,
+                                timestamp=datetime.now().timestamp()
+                            )),
+                            asyncio.get_event_loop()
+                        )
+                        log_debug(f"📝 STT result: {result.alternatives[0].transcript}, final: {result.is_final}")
+        except Exception as e:
+            log_error(f"❌ Google STT stream error", error=str(e))
+        finally:
+            log_info("🎤 Google STT stream thread ended")
     async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
         """Stream audio chunk and get transcription results"""
         if not self.is_streaming:
             raise RuntimeError("Streaming not started. Call start_streaming() first.")
         try:
+            # Put audio in queue for streaming thread
+            self.audio_queue.put(audio_chunk)
+            # Check for any results (non-blocking)
+            try:
+                while True:
+                    result = self.responses_queue.get_nowait()
+                    yield result
+            except asyncio.QueueEmpty:
+                pass
         except Exception as e:
+            log_error(f"❌ Google STT streaming error", error=str(e))
             raise
     async def stop_streaming(self) -> Optional[TranscriptionResult]:
             return None
         try:
+            log_info("🛑 Stopping Google STT streaming...")
             self.is_streaming = False
+            self.stop_event.set()
+            # Send poison pill to queue
+            self.audio_queue.put(None)
+            # Wait for thread to finish
+            if self.stream_thread:
+                self.stream_thread.join(timeout=5.0)
+            # Clear queues
+            while not self.audio_queue.empty():
+                self.audio_queue.get_nowait()
+            final_result = None
+            while not self.responses_queue.empty():
+                result = await self.responses_queue.get()
+                if result.is_final:
+                    final_result = result
+            log_info("✅ Google STT streaming stopped")
+            return final_result
         except Exception as e:
+            log_error(f"❌ Failed to stop Google STT streaming", error=str(e))
+            return None
     def supports_realtime(self) -> bool:
         """Google Cloud STT supports real-time streaming"""