""" Google Cloud Speech-to-Text Implementation """ import os import asyncio from typing import AsyncIterator, Optional, List, Any from datetime import datetime import sys import queue import threading from logger import log_info, log_error, log_debug, log_warning # Import Google Cloud Speech only if available try: from google.cloud import speech from google.api_core import exceptions GOOGLE_SPEECH_AVAILABLE = True except ImportError: GOOGLE_SPEECH_AVAILABLE = False log_info("⚠️ Google Cloud Speech library not installed") from stt_interface import STTInterface, STTConfig, TranscriptionResult class GoogleCloudSTT(STTInterface): """Google Cloud Speech-to-Text implementation""" def __init__(self, credentials_path: str): if not GOOGLE_SPEECH_AVAILABLE: raise ImportError("google-cloud-speech library not installed. Run: pip install google-cloud-speech") if credentials_path and os.path.exists(credentials_path): os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path log_info(f"✅ Google credentials set from: {credentials_path}") # Test credential'ları try: self.client = speech.SpeechClient() # Basit bir test çağrısı log_info("🔐 Testing Google credentials...") # Bu sadece client'ın oluşturulabildiğini test eder log_info("✅ Google credentials valid") except Exception as e: log_error(f"❌ Google credentials error", error=str(e)) raise else: log_error(f"❌ Google credentials path not found: {credentials_path}") raise FileNotFoundError(f"Credentials file not found: {credentials_path}") self.client = speech.SpeechClient() self.streaming_config = None self.is_streaming = False self.audio_queue = queue.Queue() self.responses_queue = queue.Queue() # Normal Queue, asyncio.Queue değil! self.stream_thread = None self.stop_event = threading.Event() async def start_streaming(self, config: dict) -> None: """Initialize streaming session""" try: log_info(f"🎤 Starting Google STT streaming with config: {config}") # Convert dict to STTConfig if needed if isinstance(config, dict): stt_config = STTConfig( language=config.get("language", "tr-TR"), sample_rate=config.get("sample_rate", 16000), encoding=config.get("encoding", "WEBM_OPUS"), enable_punctuation=config.get("enable_punctuation", True), interim_results=config.get("interim_results", True), single_utterance=config.get("single_utterance", False) ) else: stt_config = config recognition_config = speech.RecognitionConfig( encoding=self._get_encoding(stt_config.encoding), sample_rate_hertz=stt_config.sample_rate, language_code=stt_config.language, enable_automatic_punctuation=stt_config.enable_punctuation, model="latest_long", use_enhanced=True ) self.streaming_config = speech.StreamingRecognitionConfig( config=recognition_config, interim_results=stt_config.interim_results, single_utterance=stt_config.single_utterance ) self.is_streaming = True self.stop_event.clear() # Start streaming thread self.stream_thread = threading.Thread(target=self._run_stream) self.stream_thread.start() log_info("✅ Google STT streaming started successfully") except Exception as e: log_error(f"❌ Failed to start Google STT streaming", error=str(e)) self.is_streaming = False raise def _put_result(self, result: TranscriptionResult): """Helper to put result in queue""" try: self.responses_queue.put(result) # Debug log'u kaldırdık except Exception as e: log_error(f"❌ Error queuing result: {e}") def _run_stream(self): """Run the streaming recognition in a separate thread""" try: log_info("🎤 Google STT stream thread started") def request_generator(): """Generate streaming requests""" chunk_count = 0 start_time = datetime.now() while not self.stop_event.is_set(): try: # 5 dakika sınırına yaklaşıyorsak stream'i sonlandır elapsed = (datetime.now() - start_time).total_seconds() if elapsed > 280: # 4 dakika 40 saniye - güvenli margin log_warning(f"⚠️ Approaching 5-minute limit ({elapsed:.1f}s), ending stream gracefully") break # Get audio chunk with timeout chunk = self.audio_queue.get(timeout=0.1) if chunk is None: # Poison pill log_info("📛 Poison pill received, stopping request generator") break chunk_count += 1 # Sadece önemli milestone'larda logla if chunk_count == 1: log_info(f"📤 First chunk sent to Google STT, size: {len(chunk)} bytes") elif chunk_count % 100 == 0: log_info(f"📤 Sent {chunk_count} chunks to Google STT (elapsed: {elapsed:.1f}s)") yield speech.StreamingRecognizeRequest(audio_content=chunk) except queue.Empty: continue except Exception as e: log_error(f"❌ Error in request generator: {e}") break # Create streaming client requests = request_generator() log_info("🎤 Creating Google STT streaming client...") try: responses = self.client.streaming_recognize(self.streaming_config, requests) log_info("✅ Google STT streaming client created") # Process responses response_count = 0 empty_response_count = 0 for response in responses: response_count += 1 if self.stop_event.is_set(): log_info("🛑 Stop event detected, breaking response loop") break # Boş response'ları say ama loglama if not response.results: empty_response_count += 1 if empty_response_count % 50 == 0: log_warning(f"⚠️ Received {empty_response_count} empty responses from Google STT") continue for result in response.results: if not result.alternatives: continue # İlk alternatifi al alternative = result.alternatives[0] # Sadece anlamlı text'leri işle if alternative.transcript.strip(): # Create transcription result transcription = TranscriptionResult( text=alternative.transcript, is_final=result.is_final, confidence=alternative.confidence if hasattr(alternative, 'confidence') and alternative.confidence else 0.0, timestamp=datetime.now().timestamp() ) # Put result in queue self._put_result(transcription) # SADECE final result'ları logla if result.is_final: log_info(f"🎯 GOOGLE STT FINAL: '{alternative.transcript}'") log_info(f"📊 Google STT stream ended. Total responses: {response_count}, Empty: {empty_response_count}") except Exception as e: error_msg = str(e) # Detaylı hata mesajları if "Exceeded maximum allowed stream duration" in error_msg: log_warning("⚠️ Stream duration limit exceeded (5 minutes). This is expected for long sessions.") # Bu bir error değil, normal davranış - result queue'ya error koymuyoruz elif "Bad language code" in error_msg: log_error(f"❌ Invalid language code in STT config. Check locale settings.") elif "invalid_argument" in error_msg: log_error(f"❌ Invalid STT configuration. Check encoding and sample rate.") elif "Deadline Exceeded" in error_msg: log_error(f"❌ Google STT timeout - possibly network issue or slow connection") else: log_error(f"❌ Google STT stream error: {error_msg}") except Exception as e: log_error(f"❌ Fatal error in STT stream thread", error=str(e), traceback=traceback.format_exc()) finally: log_info("🎤 Google STT stream thread ended") async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]: """Stream audio chunk and get transcription results""" if not self.is_streaming: raise RuntimeError("Streaming not started. Call start_streaming() first.") try: # Put audio in queue for streaming thread self.audio_queue.put(audio_chunk) # Check for any results in queue while True: try: # Non-blocking get from normal queue result = self.responses_queue.get_nowait() # Debug log'u kaldırdık yield result except queue.Empty: # No more results in queue break except Exception as e: log_error(f"❌ Google STT streaming error", error=str(e)) raise async def stop_streaming(self) -> Optional[TranscriptionResult]: """Stop streaming and get final result""" if not self.is_streaming: return None try: log_info("🛑 Stopping Google STT streaming...") self.is_streaming = False self.stop_event.set() # Send poison pill to queue self.audio_queue.put(None) # Wait for thread to finish if self.stream_thread: self.stream_thread.join(timeout=5.0) # Clear queues while not self.audio_queue.empty(): self.audio_queue.get_nowait() final_result = None while not self.responses_queue.empty(): result = await self.responses_queue.get() if result.is_final: final_result = result log_info("✅ Google STT streaming stopped") return final_result except Exception as e: log_error(f"❌ Failed to stop Google STT streaming", error=str(e)) return None def supports_realtime(self) -> bool: """Google Cloud STT supports real-time streaming""" return True def get_supported_languages(self) -> List[str]: """Get list of supported language codes""" return [ "tr-TR", # Turkish "en-US", # English (US) "en-GB", # English (UK) "de-DE", # German "fr-FR", # French "es-ES", # Spanish "it-IT", # Italian "pt-BR", # Portuguese (Brazil) "ru-RU", # Russian "ja-JP", # Japanese "ko-KR", # Korean "zh-CN", # Chinese (Simplified) "ar-SA", # Arabic ] def get_provider_name(self) -> str: """Get provider name""" return "google" def _get_encoding(self, encoding_str: str): """Convert encoding string to Google Speech enum""" if not GOOGLE_SPEECH_AVAILABLE: return None encoding_map = { "WEBM_OPUS": speech.RecognitionConfig.AudioEncoding.WEBM_OPUS, "LINEAR16": speech.RecognitionConfig.AudioEncoding.LINEAR16, "FLAC": speech.RecognitionConfig.AudioEncoding.FLAC, "MP3": speech.RecognitionConfig.AudioEncoding.MP3, "OGG_OPUS": speech.RecognitionConfig.AudioEncoding.OGG_OPUS, } return encoding_map.get(encoding_str, speech.RecognitionConfig.AudioEncoding.WEBM_OPUS)