ciyidogan commited on
Commit
30f7ccb
·
verified ·
1 Parent(s): 855e594

Update stt_google.py

Browse files
Files changed (1) hide show
  1. stt_google.py +105 -32
stt_google.py CHANGED
@@ -3,14 +3,16 @@ Google Cloud Speech-to-Text Implementation
3
  """
4
  import os
5
  import asyncio
6
- from typing import AsyncIterator, Optional, List
7
  from datetime import datetime
8
  import sys
 
 
9
  from logger import log_info, log_error, log_debug, log_warning
10
 
11
  # Import Google Cloud Speech only if available
12
  try:
13
- from google.cloud import speech_v1p1beta1 as speech
14
  from google.api_core import exceptions
15
  GOOGLE_SPEECH_AVAILABLE = True
16
  except ImportError:
@@ -32,25 +34,26 @@ class GoogleCloudSTT(STTInterface):
32
  else:
33
  log_info("⚠️ Google credentials path not found, using default credentials")
34
 
35
- self.client = speech.SpeechAsyncClient()
36
  self.streaming_config = None
37
  self.is_streaming = False
38
- self.audio_queue = asyncio.Queue()
 
 
 
39
 
40
- async def start_streaming(self, config: STTConfig) -> None:
41
  """Initialize streaming session"""
42
  try:
43
- # Config'in dict mi STTConfig objesi mi olduğunu kontrol et
 
 
44
  if isinstance(config, dict):
45
- # Dict ise STTConfig objesine çevir
46
  stt_config = STTConfig(
47
  language=config.get("language", "tr-TR"),
48
  sample_rate=config.get("sample_rate", 16000),
49
  encoding=config.get("encoding", "WEBM_OPUS"),
50
  enable_punctuation=config.get("enable_punctuation", True),
51
- enable_word_timestamps=config.get("enable_word_timestamps", False),
52
- model=config.get("model", "latest_long"),
53
- use_enhanced=config.get("use_enhanced", True),
54
  interim_results=config.get("interim_results", True),
55
  single_utterance=config.get("single_utterance", False)
56
  )
@@ -62,9 +65,8 @@ class GoogleCloudSTT(STTInterface):
62
  sample_rate_hertz=stt_config.sample_rate,
63
  language_code=stt_config.language,
64
  enable_automatic_punctuation=stt_config.enable_punctuation,
65
- enable_word_time_offsets=stt_config.enable_word_timestamps,
66
- model=stt_config.model,
67
- use_enhanced=stt_config.use_enhanced
68
  )
69
 
70
  self.streaming_config = speech.StreamingRecognitionConfig(
@@ -74,32 +76,84 @@ class GoogleCloudSTT(STTInterface):
74
  )
75
 
76
  self.is_streaming = True
77
- log_info("✅ Google STT streaming started")
 
 
 
 
 
 
78
 
79
  except Exception as e:
80
- log_error("❌ Failed to start Google STT streaming", e)
 
81
  raise
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
84
  """Stream audio chunk and get transcription results"""
85
  if not self.is_streaming:
86
  raise RuntimeError("Streaming not started. Call start_streaming() first.")
87
 
88
  try:
89
- # Add audio to queue
90
- await self.audio_queue.put(audio_chunk)
91
-
92
- # Process with Google STT
93
- request = speech.StreamingRecognizeRequest(audio_content=audio_chunk)
94
-
95
- # This is a simplified version - actual implementation would need
96
- # proper streaming handling with Google's API
97
- # For now, return empty iterator
98
- return
99
- yield # Make it a generator
100
 
 
 
 
 
 
 
 
 
101
  except Exception as e:
102
- log_error("❌ Google STT streaming error", e)
103
  raise
104
 
105
  async def stop_streaming(self) -> Optional[TranscriptionResult]:
@@ -108,15 +162,34 @@ class GoogleCloudSTT(STTInterface):
108
  return None
109
 
110
  try:
 
 
111
  self.is_streaming = False
112
- log_info("✅ Google STT streaming stopped")
113
 
114
- # Return final result if any
115
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  except Exception as e:
118
- log_error("❌ Failed to stop Google STT streaming", e)
119
- raise
120
 
121
  def supports_realtime(self) -> bool:
122
  """Google Cloud STT supports real-time streaming"""
 
3
  """
4
  import os
5
  import asyncio
6
+ from typing import AsyncIterator, Optional, List, Any
7
  from datetime import datetime
8
  import sys
9
+ import queue
10
+ import threading
11
  from logger import log_info, log_error, log_debug, log_warning
12
 
13
  # Import Google Cloud Speech only if available
14
  try:
15
+ from google.cloud import speech
16
  from google.api_core import exceptions
17
  GOOGLE_SPEECH_AVAILABLE = True
18
  except ImportError:
 
34
  else:
35
  log_info("⚠️ Google credentials path not found, using default credentials")
36
 
37
+ self.client = speech.SpeechClient()
38
  self.streaming_config = None
39
  self.is_streaming = False
40
+ self.audio_queue = queue.Queue()
41
+ self.responses_queue = asyncio.Queue()
42
+ self.stream_thread = None
43
+ self.stop_event = threading.Event()
44
 
45
+ async def start_streaming(self, config: dict) -> None:
46
  """Initialize streaming session"""
47
  try:
48
+ log_info(f"🎤 Starting Google STT streaming with config: {config}")
49
+
50
+ # Convert dict to STTConfig if needed
51
  if isinstance(config, dict):
 
52
  stt_config = STTConfig(
53
  language=config.get("language", "tr-TR"),
54
  sample_rate=config.get("sample_rate", 16000),
55
  encoding=config.get("encoding", "WEBM_OPUS"),
56
  enable_punctuation=config.get("enable_punctuation", True),
 
 
 
57
  interim_results=config.get("interim_results", True),
58
  single_utterance=config.get("single_utterance", False)
59
  )
 
65
  sample_rate_hertz=stt_config.sample_rate,
66
  language_code=stt_config.language,
67
  enable_automatic_punctuation=stt_config.enable_punctuation,
68
+ model="latest_long",
69
+ use_enhanced=True
 
70
  )
71
 
72
  self.streaming_config = speech.StreamingRecognitionConfig(
 
76
  )
77
 
78
  self.is_streaming = True
79
+ self.stop_event.clear()
80
+
81
+ # Start streaming thread
82
+ self.stream_thread = threading.Thread(target=self._run_stream)
83
+ self.stream_thread.start()
84
+
85
+ log_info("✅ Google STT streaming started successfully")
86
 
87
  except Exception as e:
88
+ log_error(f"❌ Failed to start Google STT streaming", error=str(e))
89
+ self.is_streaming = False
90
  raise
91
 
92
+ def _run_stream(self):
93
+ """Run the streaming recognition in a separate thread"""
94
+ try:
95
+ log_info("🎤 Google STT stream thread started")
96
+
97
+ def request_generator():
98
+ """Generate streaming requests"""
99
+ while not self.stop_event.is_set():
100
+ try:
101
+ # Get audio chunk with timeout
102
+ chunk = self.audio_queue.get(timeout=0.1)
103
+ if chunk is None: # Poison pill
104
+ break
105
+ yield speech.StreamingRecognizeRequest(audio_content=chunk)
106
+ except queue.Empty:
107
+ continue
108
+
109
+ # Create streaming client
110
+ requests = request_generator()
111
+ responses = self.client.streaming_recognize(self.streaming_config, requests)
112
+
113
+ # Process responses
114
+ for response in responses:
115
+ if self.stop_event.is_set():
116
+ break
117
+
118
+ for result in response.results:
119
+ if result.alternatives:
120
+ # Put result in async queue
121
+ asyncio.run_coroutine_threadsafe(
122
+ self.responses_queue.put(TranscriptionResult(
123
+ text=result.alternatives[0].transcript,
124
+ is_final=result.is_final,
125
+ confidence=result.alternatives[0].confidence if result.alternatives[0].confidence else 0.0,
126
+ timestamp=datetime.now().timestamp()
127
+ )),
128
+ asyncio.get_event_loop()
129
+ )
130
+
131
+ log_debug(f"📝 STT result: {result.alternatives[0].transcript}, final: {result.is_final}")
132
+
133
+ except Exception as e:
134
+ log_error(f"❌ Google STT stream error", error=str(e))
135
+ finally:
136
+ log_info("🎤 Google STT stream thread ended")
137
+
138
  async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
139
  """Stream audio chunk and get transcription results"""
140
  if not self.is_streaming:
141
  raise RuntimeError("Streaming not started. Call start_streaming() first.")
142
 
143
  try:
144
+ # Put audio in queue for streaming thread
145
+ self.audio_queue.put(audio_chunk)
 
 
 
 
 
 
 
 
 
146
 
147
+ # Check for any results (non-blocking)
148
+ try:
149
+ while True:
150
+ result = self.responses_queue.get_nowait()
151
+ yield result
152
+ except asyncio.QueueEmpty:
153
+ pass
154
+
155
  except Exception as e:
156
+ log_error(f"❌ Google STT streaming error", error=str(e))
157
  raise
158
 
159
  async def stop_streaming(self) -> Optional[TranscriptionResult]:
 
162
  return None
163
 
164
  try:
165
+ log_info("🛑 Stopping Google STT streaming...")
166
+
167
  self.is_streaming = False
168
+ self.stop_event.set()
169
 
170
+ # Send poison pill to queue
171
+ self.audio_queue.put(None)
172
+
173
+ # Wait for thread to finish
174
+ if self.stream_thread:
175
+ self.stream_thread.join(timeout=5.0)
176
+
177
+ # Clear queues
178
+ while not self.audio_queue.empty():
179
+ self.audio_queue.get_nowait()
180
+
181
+ final_result = None
182
+ while not self.responses_queue.empty():
183
+ result = await self.responses_queue.get()
184
+ if result.is_final:
185
+ final_result = result
186
+
187
+ log_info("✅ Google STT streaming stopped")
188
+ return final_result
189
 
190
  except Exception as e:
191
+ log_error(f"❌ Failed to stop Google STT streaming", error=str(e))
192
+ return None
193
 
194
  def supports_realtime(self) -> bool:
195
  """Google Cloud STT supports real-time streaming"""