""" TTS Interface and Implementations """ from abc import ABC, abstractmethod from typing import Optional, Dict, Any, Set import httpx import os from datetime import datetime import sys from tts_preprocessor import TTSPreprocessor def log(message: str): timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3] print(f"[{timestamp}] {message}") sys.stdout.flush() class TTSInterface(ABC): """Abstract base class for TTS providers""" def __init__(self): self.preprocessing_flags: Set[str] = set() self.supports_ssml: bool = False @abstractmethod async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: """Convert text to speech and return audio bytes""" pass @abstractmethod def get_supported_voices(self) -> Dict[str, str]: """Get list of supported voices""" pass def get_preprocessing_flags(self) -> Set[str]: """Get preprocessing flags for this provider""" return self.preprocessing_flags def supports_ssml_format(self) -> bool: """Check if provider supports SSML""" return self.supports_ssml class ElevenLabsTTS(TTSInterface): """ElevenLabs TTS implementation""" def __init__(self, api_key: str): super().__init__() self.api_key = api_key.strip() # Başındaki/sonundaki boşlukları temizle self.base_url = "https://api.elevenlabs.io/v1" self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia # ElevenLabs için preprocessing gereken alanlar self.preprocessing_flags = { TTSPreprocessor.PREPROCESS_NUMBERS, # Büyük sayılar TTSPreprocessor.PREPROCESS_CURRENCY, # Para birimleri TTSPreprocessor.PREPROCESS_TIME, # Saat formatı TTSPreprocessor.PREPROCESS_CODES, # PNR kodları TTSPreprocessor.PREPROCESS_PERCENTAGE # Yüzdeler } # tr-TR -> tr dönüşümü self.preprocessor = TTSPreprocessor(language="tr") async def synthesize( self, text: str, voice_id: Optional[str] = None, model_id: Optional[str] = None, output_format: Optional[str] = None, **kwargs ) -> bytes: """Convert text to speech using ElevenLabs API""" # Preprocess text processed_text = self.preprocessor.preprocess(text, self.preprocessing_flags) # Use defaults if not provided voice_id = voice_id or self.default_voice_id model_id = model_id or "eleven_multilingual_v2" output_format = output_format or "mp3_44100_128" url = f"{self.base_url}/text-to-speech/{voice_id}" headers = { "Accept": "audio/mpeg", "Content-Type": "application/json", "xi-api-key": self.api_key } data = { "text": processed_text, "model_id": model_id, "voice_settings": { "stability": 0.5, "similarity_boost": 0.75, "style": 0.0, "use_speaker_boost": True } } # Add output format to URL if specified if output_format: url += f"?output_format={output_format}" try: async with httpx.AsyncClient() as client: log(f"🎤 ElevenLabs TTS request: voice={voice_id}, model={model_id}") log(f"📝 Text (first 100 chars): {processed_text[:100]}...") response = await client.post( url, json=data, headers=headers, timeout=30.0 ) response.raise_for_status() audio_data = response.content log(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes") return audio_data except httpx.HTTPStatusError as e: log(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}") raise except Exception as e: log(f"❌ TTS synthesis error: {e}") raise def get_supported_voices(self) -> Dict[str, str]: """Get default voices - full list can be fetched from API""" return { "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)", "EXAVITQu4vr4xnSDxMaL": "Bella (Female)", "ErXwobaYiN019PkySvjV": "Antoni (Male)", "VR6AewLTigWG4xSOukaG": "Arnold (Male)", "pNInz6obpgDQGcFmaJgB": "Adam (Male)", "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)", } class BlazeTTS(TTSInterface): """Placeholder for future Blaze TTS implementation""" def __init__(self, api_key: str): super().__init__() self.api_key = api_key async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: raise NotImplementedError("Blaze TTS not implemented yet") def get_supported_voices(self) -> Dict[str, str]: return {} def create_tts_provider(engine: str, api_key: Optional[str] = None) -> Optional[TTSInterface]: """Factory function to create TTS provider instances""" if engine == "elevenlabs" and api_key: return ElevenLabsTTS(api_key) elif engine == "blaze" and api_key: return BlazeTTS(api_key) elif engine == "no_tts": return None else: log(f"⚠️ Unknown or unconfigured TTS engine: {engine}") return None