""" TTS Interface and Implementations """ from abc import ABC, abstractmethod from typing import Optional, Dict, Any import httpx import os from datetime import datetime import sys from tts_preprocessor import TTSPreprocessor def log(message: str): timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3] print(f"[{timestamp}] {message}") sys.stdout.flush() class TTSInterface(ABC): """Abstract base class for TTS providers""" @abstractmethod async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: """ Convert text to speech and return audio bytes Args: text: Text to convert to speech voice_id: Optional voice ID specific to the provider **kwargs: Additional provider-specific parameters Returns: Audio data as bytes (MP3 or WAV format) """ pass @abstractmethod def get_supported_voices(self) -> Dict[str, str]: """Get list of supported voices""" pass class ElevenLabsTTS(TTSInterface): """ElevenLabs TTS implementation""" def __init__(self, api_key: str): self.api_key = api_key self.base_url = "https://api.elevenlabs.io/v1" self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia # ElevenLabs için preprocessing gereken alanlar self.preprocessing_flags = { TTSPreprocessor.PREPROCESS_NUMBERS, # Büyük sayılar TTSPreprocessor.PREPROCESS_CURRENCY, # Para birimleri TTSPreprocessor.PREPROCESS_TIME, # Saat formatı TTSPreprocessor.PREPROCESS_CODES, # PNR kodları TTSPreprocessor.PREPROCESS_PERCENTAGE # Yüzdeler } self.preprocessor = TTSPreprocessor(language="tr") # Debug log masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***" log(f"🔑 ElevenLabsTTS initialized with key: {masked_key}") async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: """Convert text to speech using ElevenLabs API""" try: # Apply preprocessing if not disabled if kwargs.get("disable_preprocessing", False) != True: text = self.preprocessor.preprocess(text, self.preprocessing_flags) log(f"📝 Preprocessed text: {text[:100]}...") voice = voice_id or self.default_voice_id url = f"{self.base_url}/text-to-speech/{voice}" headers = { "xi-api-key": self.api_key, "Content-Type": "application/json" } # Default parameters data = { "text": text, "model_id": kwargs.get("model_id", "eleven_multilingual_v2"), "voice_settings": kwargs.get("voice_settings", { "stability": 1, "similarity_boost": 0.85, "style": 0.7, "speed": 1.14, "use_speaker_boost": True }) } # Add optional parameters if "output_format" in kwargs: params = {"output_format": kwargs["output_format"]} else: params = {"output_format": "mp3_44100_128"} log(f"🎤 Calling ElevenLabs TTS for {len(text)} characters") async with httpx.AsyncClient(timeout=30) as client: response = await client.post( url, headers=headers, json=data, params=params ) response.raise_for_status() audio_data = response.content log(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes") return audio_data except httpx.HTTPStatusError as e: log(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}") raise except Exception as e: log(f"❌ TTS synthesis error: {e}") raise def get_supported_voices(self) -> Dict[str, str]: """Get default voices - full list can be fetched from API""" return { "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)", "EXAVITQu4vr4xnSDxMaL": "Bella (Female)", "ErXwobaYiN019PkySvjV": "Antoni (Male)", "VR6AewLTigWG4xSOukaG": "Arnold (Male)", "pNInz6obpgDQGcFmaJgB": "Adam (Male)", "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)", } def get_preprocessing_flags(self) -> Set[str]: """Get preprocessing flags for ElevenLabs""" return self.preprocessing_flags class BlazeTTS(TTSInterface): """Placeholder for future Blaze TTS implementation""" def __init__(self, api_key: str): self.api_key = api_key async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: raise NotImplementedError("Blaze TTS not implemented yet") def get_supported_voices(self) -> Dict[str, str]: return {} def create_tts_provider(engine: str, api_key: Optional[str] = None) -> Optional[TTSInterface]: """Factory function to create TTS provider instances""" if engine == "elevenlabs" and api_key: return ElevenLabsTTS(api_key) elif engine == "blaze" and api_key: return BlazeTTS(api_key) elif engine == "no_tts": return None else: log(f"⚠️ Unknown or unconfigured TTS engine: {engine}") return None