""" ElevenLabs TTS Implementation """ import httpx from typing import Optional, Dict from .tts_interface import TTSInterface from utils.logger import log_info, log_error, log_debug, log_warning class ElevenLabsTTS(TTSInterface): """ElevenLabs TTS implementation""" def __init__(self, api_key: str): super().__init__() self.api_key = api_key.strip() self.base_url = "https://api.elevenlabs.io/v1" self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia # ElevenLabs preprocessing needs self.preprocessing_flags = { "PREPROCESS_NUMBERS", # Large numbers "PREPROCESS_CURRENCY", # Currency amounts "PREPROCESS_TIME", # Time format "PREPROCESS_CODES", # PNR/codes "PREPROCESS_PHONE" # Phone numbers } # Debug log masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***" log_debug(f"🔑 ElevenLabsTTS initialized with key: {masked_key}") async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: """Convert text to speech using ElevenLabs API""" try: voice = voice_id or self.default_voice_id url = f"{self.base_url}/text-to-speech/{voice}" headers = { "xi-api-key": self.api_key, "Content-Type": "application/json" } # Default parameters data = { "text": text, "model_id": kwargs.get("model_id", "eleven_multilingual_v2"), "voice_settings": kwargs.get("voice_settings", { "stability": 1, "similarity_boost": 0.85, "style": 0.7, "speed": 1.14, "use_speaker_boost": True }) } # Add optional parameters if "output_format" in kwargs: params = {"output_format": kwargs["output_format"]} else: params = {"output_format": "mp3_44100_128"} log_debug(f"🎤 Calling ElevenLabs TTS for {len(text)} characters") async with httpx.AsyncClient(timeout=30.0) as client: response = await client.post( url, headers=headers, json=data, params=params ) response.raise_for_status() audio_data = response.content # This should be bytes # Ensure we're returning bytes if isinstance(audio_data, str): log_warning("ElevenLabs returned string instead of bytes") # Try to decode if it's base64 try: audio_data = base64.b64decode(audio_data) except: pass log_debug(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes") log_debug(f"Audio data type: {type(audio_data)}") return audio_data except httpx.HTTPStatusError as e: log_error(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}") raise except Exception as e: log_error("❌ TTS synthesis error", e) raise def get_supported_voices(self) -> Dict[str, str]: """Get default voices - full list can be fetched from API""" return { "2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)", "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)", "EXAVITQu4vr4xnSDxMaL": "Bella (Female)", "ErXwobaYiN019PkySvjV": "Antoni (Male)", "VR6AewLTigWG4xSOukaG": "Arnold (Male)", "pNInz6obpgDQGcFmaJgB": "Adam (Male)", "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)", } def get_provider_name(self) -> str: """Get provider name""" return "elevenlabs"