ciyidogan commited on
Commit
6dce7fc
·
verified ·
1 Parent(s): 16f893b

Delete stt_google.py

Browse files
Files changed (1) hide show
  1. stt_google.py +0 -485
stt_google.py DELETED
@@ -1,485 +0,0 @@
1
- """
2
- Google Cloud Speech-to-Text Implementation
3
- """
4
- import os
5
- import asyncio
6
- from typing import AsyncIterator, Optional, List, Any
7
- from datetime import datetime
8
- import sys
9
- import queue
10
- import threading
11
- import time
12
- import traceback
13
- from logger import log_info, log_error, log_debug, log_warning
14
-
15
- # Import Google Cloud Speech only if available
16
- try:
17
- from google.cloud import speech
18
- from google.api_core import exceptions
19
- GOOGLE_SPEECH_AVAILABLE = True
20
- except ImportError:
21
- GOOGLE_SPEECH_AVAILABLE = False
22
- log_info("⚠️ Google Cloud Speech library not installed")
23
-
24
- from stt_interface import STTInterface, STTConfig, TranscriptionResult
25
-
26
- class GoogleCloudSTT(STTInterface):
27
- """Google Cloud Speech-to-Text implementation"""
28
-
29
- def __init__(self, credentials_path: str):
30
- if not GOOGLE_SPEECH_AVAILABLE:
31
- raise ImportError("google-cloud-speech library not installed. Run: pip install google-cloud-speech")
32
-
33
- if credentials_path and os.path.exists(credentials_path):
34
- os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
35
- log_info(f"✅ Google credentials set from: {credentials_path}")
36
-
37
- # Test credential'ları
38
- try:
39
- # Client'ı burada oluşturma, her seferinde yeni instance oluştur
40
- test_client = speech.SpeechClient()
41
- log_info("🔐 Testing Google credentials...")
42
- log_info("✅ Google credentials valid")
43
- # Test client'ı kapat
44
- if hasattr(test_client, 'transport') and hasattr(test_client.transport, 'close'):
45
- test_client.transport.close()
46
- except Exception as e:
47
- log_error(f"❌ Google credentials error", error=str(e))
48
- raise
49
- else:
50
- log_error(f"❌ Google credentials path not found: {credentials_path}")
51
- raise FileNotFoundError(f"Credentials file not found: {credentials_path}")
52
-
53
- # Client'ı burada oluşturma, start_streaming'de oluştur
54
- self.client = None
55
- self.streaming_config = None
56
- self.is_streaming = False
57
- self.audio_queue = None # Queue'ları None olarak başlat
58
- self.responses_queue = None
59
- self.stream_thread = None
60
- self.stop_event = threading.Event()
61
- self.credentials_path = credentials_path
62
-
63
- # Session tracking
64
- self.session_id = 0
65
- self.total_audio_bytes = 0
66
- self.total_chunks = 0
67
-
68
- def _reset_session_data(self):
69
- """Reset all session-specific data"""
70
- # Queue'ları temizle
71
- if self.audio_queue:
72
- while not self.audio_queue.empty():
73
- try:
74
- self.audio_queue.get_nowait()
75
- except:
76
- pass
77
-
78
- if self.responses_queue:
79
- while not self.responses_queue.empty():
80
- try:
81
- self.responses_queue.get_nowait()
82
- except:
83
- pass
84
-
85
- # Counters'ı sıfırla
86
- self.total_audio_bytes = 0
87
- self.total_chunks = 0
88
-
89
- # Yeni session ID
90
- self.session_id += 1
91
-
92
- log_info(f"🔄 Google STT session data reset. New session ID: {self.session_id}")
93
-
94
- def _create_fresh_queues(self):
95
- """Create fresh queue instances"""
96
- # Eski queue'ları temizle
97
- if self.audio_queue:
98
- while not self.audio_queue.empty():
99
- try:
100
- self.audio_queue.get_nowait()
101
- except:
102
- pass
103
-
104
- if self.responses_queue:
105
- while not self.responses_queue.empty():
106
- try:
107
- self.responses_queue.get_nowait()
108
- except:
109
- pass
110
-
111
- # Yeni queue'lar oluştur
112
- self.audio_queue = queue.Queue(maxsize=1000) # Max size ekle
113
- self.responses_queue = queue.Queue(maxsize=100)
114
- log_debug("✅ Created fresh queues")
115
-
116
- async def start_streaming(self, config: dict) -> None:
117
- """Initialize streaming session with clean state"""
118
- try:
119
- # Önce mevcut stream'i temizle
120
- if self.is_streaming or self.stream_thread:
121
- log_warning("⚠️ Previous stream still active, stopping it first")
122
- await self.stop_streaming()
123
- # Temizlik için bekle
124
- await asyncio.sleep(0.5)
125
-
126
- # Session verilerini resetle ve ID'yi artır
127
- self._reset_session_data()
128
-
129
- log_info(f"🎤 Starting Google STT streaming session #{self.session_id} with config: {config}")
130
-
131
- # Fresh queue'lar oluştur
132
- self._create_fresh_queues()
133
-
134
- # Stop event'i temizle
135
- self.stop_event.clear()
136
-
137
- # Yeni client oluştur (TEK SEFER)
138
- self.client = speech.SpeechClient()
139
- log_info("✅ Created new Google Speech client")
140
-
141
- # Convert dict to STTConfig if needed
142
- if isinstance(config, dict):
143
- stt_config = STTConfig(
144
- language=config.get("language", "tr-TR"),
145
- sample_rate=config.get("sample_rate", 16000),
146
- encoding=config.get("encoding", "WEBM_OPUS"),
147
- enable_punctuation=config.get("enable_punctuation", True),
148
- interim_results=config.get("interim_results", True),
149
- single_utterance=config.get("single_utterance", False)
150
- )
151
- else:
152
- stt_config = config
153
-
154
- recognition_config = speech.RecognitionConfig(
155
- encoding=self._get_encoding(stt_config.encoding),
156
- sample_rate_hertz=stt_config.sample_rate,
157
- language_code=stt_config.language,
158
- enable_automatic_punctuation=stt_config.enable_punctuation,
159
- model="latest_long",
160
- use_enhanced=True
161
- )
162
-
163
- self.streaming_config = speech.StreamingRecognitionConfig(
164
- config=recognition_config,
165
- interim_results=stt_config.interim_results,
166
- single_utterance=stt_config.single_utterance
167
- )
168
-
169
- self.is_streaming = True
170
-
171
- # Start streaming thread with unique name
172
- self.stream_thread = threading.Thread(
173
- target=self._run_stream,
174
- name=f"GoogleSTT-Session-{self.session_id}"
175
- )
176
- self.stream_thread.daemon = True # Daemon thread olarak işaretle
177
- self.stream_thread.start()
178
-
179
- log_info(f"✅ Google STT streaming session #{self.session_id} started successfully")
180
-
181
- except Exception as e:
182
- log_error(f"❌ Failed to start Google STT streaming", error=str(e))
183
- self.is_streaming = False
184
- self.client = None
185
- self._create_fresh_queues() # Hata durumunda da queue'ları temizle
186
- raise
187
-
188
- def _put_result(self, result: TranscriptionResult):
189
- """Helper to put result in queue"""
190
- try:
191
- self.responses_queue.put(result)
192
- # Debug log'u kaldırdık
193
- except Exception as e:
194
- log_error(f"❌ Error queuing result: {e}")
195
-
196
- def _run_stream(self):
197
- """Run the streaming recognition in a separate thread"""
198
- try:
199
- log_info("🎤 Google STT stream thread started")
200
-
201
- def request_generator():
202
- """Generate streaming requests"""
203
- chunk_count = 0
204
- total_bytes = 0
205
-
206
- while not self.stop_event.is_set():
207
- try:
208
- chunk = self.audio_queue.get(timeout=0.1)
209
- if chunk is None:
210
- log_info("📛 Poison pill received, stopping request generator")
211
- break
212
-
213
- chunk_count += 1
214
- total_bytes += len(chunk)
215
-
216
- # İlk chunk log'u
217
- if chunk_count == 1:
218
- log_info(f"📤 First audio chunk to Google STT - size: {len(chunk)} bytes")
219
-
220
- # Her 100 chunk'ta durum raporu
221
- if chunk_count % 100 == 0:
222
- avg_chunk_size = total_bytes / chunk_count
223
- log_info(f"📤 Progress: {chunk_count} chunks, {total_bytes/1024:.1f}KB total, avg {avg_chunk_size:.0f} bytes/chunk")
224
-
225
- yield speech.StreamingRecognizeRequest(audio_content=chunk)
226
-
227
- except queue.Empty:
228
- continue
229
- except Exception as e:
230
- log_error(f"❌ Error in request generator: {e}")
231
- break
232
-
233
- # Create streaming client
234
- requests = request_generator()
235
-
236
- log_info("🎤 Creating Google STT streaming client...")
237
-
238
- try:
239
- responses = self.client.streaming_recognize(
240
- self.streaming_config,
241
- requests,
242
- timeout=300 # 5 dakika timeout
243
- )
244
-
245
- log_info("✅ Google STT streaming client created")
246
-
247
- # Response timeout kontrolü
248
- last_response_time = time.time()
249
- RESPONSE_TIMEOUT = 30 # 30 saniye içinde response gelmezse
250
-
251
- # Process responses
252
- response_count = 0
253
- empty_response_count = 0
254
-
255
- for response in responses:
256
- last_response_time = time.time()
257
- response_count += 1
258
-
259
- # Response type'ı logla
260
- if response_count == 1:
261
- log_info(f"📨 First response received from Google STT")
262
-
263
- if self.stop_event.is_set():
264
- log_info("🛑 Stop event detected, breaking response loop")
265
- break
266
-
267
- # Response içeriğini kontrol et
268
- if not response.results:
269
- empty_response_count += 1
270
- if empty_response_count % 10 == 0:
271
- log_debug(f"Empty responses received: {empty_response_count}")
272
- continue
273
-
274
- # Her result'ı işle
275
- for result_index, result in enumerate(response.results):
276
- if result.alternatives:
277
- alternative = result.alternatives[0]
278
-
279
- # Sonucu logla (sadece debug modda interim)
280
- if result.is_final or self.debug_mode:
281
- log_debug(f"📋 Result {result_index}: is_final={result.is_final}, alternatives={len(result.alternatives)}")
282
-
283
- # TranscriptionResult oluştur ve queue'ya ekle
284
- transcription_result = TranscriptionResult(
285
- text=alternative.transcript,
286
- is_final=result.is_final,
287
- confidence=alternative.confidence if hasattr(alternative, 'confidence') else 0.0
288
- )
289
-
290
- # Final result'ları her zaman logla
291
- if result.is_final:
292
- log_info(f"🎯 GOOGLE STT FINAL: '{transcription_result.text}'")
293
-
294
- # Queue'ya ekle
295
- self._put_result(transcription_result)
296
-
297
- # Eğer final result ise stream'i durdur
298
- if result.is_final and self.single_utterance:
299
- log_info("🏁 Final result received with single_utterance=True, stopping stream")
300
- self.stop_event.set()
301
- break
302
-
303
- log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}")
304
-
305
- except grpc.RpcError as e:
306
- error_details = e.details() if hasattr(e, 'details') else str(e)
307
- error_code = e.code() if hasattr(e, 'code') else None
308
- error_msg = f"gRPC error - Code: {error_code}, Details: {error_details}"
309
-
310
- # Spesifik hata durumlarını kontrol et
311
- if "Audio Timeout Error" in error_details or "stream duration" in error_details:
312
- log_info(f"⏱️ Google STT stream timeout after long duration. This is expected for long sessions.")
313
- elif "Bad language code" in error_msg:
314
- log_error(f"❌ Invalid language code in STT config. Check locale settings.")
315
- elif "invalid_argument" in error_msg:
316
- log_error(f"❌ Invalid STT configuration. Check encoding and sample rate.")
317
- elif "Deadline Exceeded" in error_msg:
318
- log_error(f"❌ Google STT response timeout - possibly network issue or slow connection")
319
- elif "503" in error_msg or "Service Unavailable" in error_msg:
320
- log_error(f"❌ Google STT service temporarily unavailable. Will retry...")
321
- else:
322
- log_error(f"❌ Google STT stream error: {error_msg}")
323
-
324
- except Exception as e:
325
- log_error(f"❌ Fatal error in STT stream thread", error=str(e), traceback=traceback.format_exc())
326
- finally:
327
- log_info("🎤 Google STT stream thread ended")
328
- # Thread bittiğinde streaming flag'ini kapat
329
- self.is_streaming = False
330
-
331
- async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
332
- """Stream audio chunk and get transcription results"""
333
- if not self.is_streaming:
334
- # Daha detaylı hata mesajı
335
- log_error(f"❌ STT not streaming - is_streaming: {self.is_streaming}, thread alive: {self.stream_thread and self.stream_thread.is_alive() if hasattr(self, 'stream_thread') else 'No thread'}")
336
- raise RuntimeError("Streaming not started. Call start_streaming() first.")
337
-
338
- try:
339
- # Put audio in queue for streaming thread
340
- self.audio_queue.put(audio_chunk)
341
-
342
- # Check for any results in queue
343
- while True:
344
- try:
345
- # Non-blocking get from normal queue
346
- result = self.responses_queue.get_nowait()
347
- yield result
348
- except queue.Empty:
349
- # No more results in queue
350
- break
351
-
352
- except Exception as e:
353
- log_error(f"❌ Google STT streaming error", error=str(e))
354
- # Stream'i tekrar başlatmayı tetikle
355
- self.is_streaming = False
356
- raise
357
-
358
- async def stop_streaming(self) -> Optional[TranscriptionResult]:
359
- """Stop streaming and clean up all resources"""
360
- if not self.is_streaming and not self.stream_thread:
361
- log_debug("Already stopped, nothing to do")
362
- return None
363
-
364
- try:
365
- log_info(f"🛑 Stopping Google STT streaming session #{self.session_id}")
366
-
367
- # Flag'i hemen kapat
368
- self.is_streaming = False
369
- self.stop_event.set()
370
-
371
- # Send poison pill to stop request generator
372
- if self.audio_queue:
373
- try:
374
- self.audio_queue.put(None)
375
- except:
376
- pass
377
-
378
- # Thread'i durdur
379
- if self.stream_thread and self.stream_thread.is_alive():
380
- log_info("⏳ Waiting for stream thread to finish...")
381
- self.stream_thread.join(timeout=5.0) # 5 saniye bekle
382
-
383
- if self.stream_thread.is_alive():
384
- log_warning("⚠️ STT thread did not stop gracefully after 5s")
385
- # Thread'i zorla sonlandıramayız Python'da, ama daemon olduğu için
386
- # ana program kapanınca otomatik kapanacak
387
- else:
388
- log_info("✅ Stream thread finished")
389
-
390
- # Final result'ı al
391
- final_result = None
392
- if self.responses_queue:
393
- while not self.responses_queue.empty():
394
- try:
395
- result = self.responses_queue.get_nowait()
396
- if result.is_final:
397
- final_result = result
398
- except:
399
- pass
400
-
401
- # Client'ı kapat
402
- if self.client:
403
- try:
404
- # Transport'u kapat
405
- if hasattr(self.client, 'transport') and hasattr(self.client.transport, 'close'):
406
- self.client.transport.close()
407
- log_debug("✅ Client transport closed")
408
-
409
- # gRPC channel'ı kapat
410
- if hasattr(self.client, '_transport') and hasattr(self.client._transport, '_grpc_channel'):
411
- self.client._transport._grpc_channel.close()
412
- log_debug("✅ gRPC channel closed")
413
- except Exception as e:
414
- log_warning(f"⚠️ Error closing Google client: {e}")
415
- finally:
416
- self.client = None
417
-
418
- # Queue'ları None yap (yeniden kullanım için fresh queue gerekecek)
419
- self.audio_queue = None
420
- self.responses_queue = None
421
-
422
- # Diğer değişkenleri resetle
423
- self.stream_thread = None
424
- self.streaming_config = None
425
- self.stop_event.clear()
426
-
427
- log_info(f"✅ Google STT streaming session #{self.session_id} stopped and cleaned")
428
- return final_result
429
-
430
- except Exception as e:
431
- log_error(f"❌ Error during stop_streaming", error=str(e))
432
- # Force cleanup on error
433
- self.is_streaming = False
434
- self.stream_thread = None
435
- self.client = None
436
- self.streaming_config = None
437
- self.stop_event.clear()
438
- self.audio_queue = None
439
- self.responses_queue = None
440
- return None
441
-
442
- def supports_realtime(self) -> bool:
443
- """Google Cloud STT supports real-time streaming"""
444
- return True
445
-
446
- def get_supported_languages(self) -> List[str]:
447
- """Get list of supported language codes"""
448
- return [
449
- "tr-TR", # Turkish
450
- "en-US", # English (US)
451
- "en-GB", # English (UK)
452
- "de-DE", # German
453
- "fr-FR", # French
454
- "es-ES", # Spanish
455
- "it-IT", # Italian
456
- "pt-BR", # Portuguese (Brazil)
457
- "ru-RU", # Russian
458
- "ja-JP", # Japanese
459
- "ko-KR", # Korean
460
- "zh-CN", # Chinese (Simplified)
461
- "ar-SA", # Arabic
462
- ]
463
-
464
- def get_provider_name(self) -> str:
465
- """Get provider name"""
466
- return "google"
467
-
468
- def _get_encoding(self, encoding_str: str):
469
- """Convert encoding string to Google Speech enum"""
470
- if not GOOGLE_SPEECH_AVAILABLE:
471
- return None
472
-
473
- encoding_map = {
474
- "WEBM_OPUS": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS,
475
- "LINEAR16": speech.RecognitionConfig.AudioEncoding.LINEAR16,
476
- "FLAC": speech.RecognitionConfig.AudioEncoding.FLAC,
477
- "MP3": speech.RecognitionConfig.AudioEncoding.MP3,
478
- "OGG_OPUS": speech.RecognitionConfig.AudioEncoding.OGG_OPUS,
479
- "MULAW": speech.RecognitionConfig.AudioEncoding.MULAW,
480
- "AMR": speech.RecognitionConfig.AudioEncoding.AMR,
481
- "AMR_WB": speech.RecognitionConfig.AudioEncoding.AMR_WB,
482
- "ENCODING_UNSPECIFIED": speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED
483
- }
484
- # Default to LINEAR16 if not found
485
- return encoding_map.get(encoding_str.upper(), speech.RecognitionConfig.AudioEncoding.LINEAR16)