Spaces:
Building
Building
File size: 3,721 Bytes
595d1d3 eb625f5 6db89b2 595d1d3 6db89b2 595d1d3 65d7499 595d1d3 6db89b2 595d1d3 6db89b2 595d1d3 6db89b2 595d1d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
"""
ElevenLabs TTS Implementation
"""
import httpx
from typing import Optional, Dict
from tts_interface import TTSInterface
from logger import log_info, log_error, log_debug, log_warning
class ElevenLabsTTS(TTSInterface):
"""ElevenLabs TTS implementation"""
def __init__(self, api_key: str):
super().__init__()
self.api_key = api_key.strip()
self.base_url = "https://api.elevenlabs.io/v1"
self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
# ElevenLabs preprocessing needs
self.preprocessing_flags = {
"PREPROCESS_NUMBERS", # Large numbers
"PREPROCESS_CURRENCY", # Currency amounts
"PREPROCESS_TIME", # Time format
"PREPROCESS_CODES", # PNR/codes
"PREPROCESS_PHONE" # Phone numbers
}
# Debug log
masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
log_debug(f"π ElevenLabsTTS initialized with key: {masked_key}")
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
"""Convert text to speech using ElevenLabs API"""
try:
voice = voice_id or self.default_voice_id
url = f"{self.base_url}/text-to-speech/{voice}"
headers = {
"xi-api-key": self.api_key,
"Content-Type": "application/json"
}
# Default parameters
data = {
"text": text,
"model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
"voice_settings": kwargs.get("voice_settings", {
"stability": 1,
"similarity_boost": 0.85,
"style": 0.7,
"speed": 1.14,
"use_speaker_boost": True
})
}
# Add optional parameters
if "output_format" in kwargs:
params = {"output_format": kwargs["output_format"]}
else:
params = {"output_format": "mp3_44100_128"}
log_degug(f"π€ Calling ElevenLabs TTS for {len(text)} characters")
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
url,
headers=headers,
json=data,
params=params
)
response.raise_for_status()
audio_data = response.content
log_debug(f"β
ElevenLabs TTS returned {len(audio_data)} bytes")
return audio_data
except httpx.HTTPStatusError as e:
log_error(f"β ElevenLabs API error: {e.response.status_code} - {e.response.text}")
raise
except Exception as e:
log_error("β TTS synthesis error", e)
raise
def get_supported_voices(self) -> Dict[str, str]:
"""Get default voices - full list can be fetched from API"""
return {
"2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)",
"21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
"EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
"ErXwobaYiN019PkySvjV": "Antoni (Male)",
"VR6AewLTigWG4xSOukaG": "Arnold (Male)",
"pNInz6obpgDQGcFmaJgB": "Adam (Male)",
"yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
}
def get_provider_name(self) -> str:
"""Get provider name"""
return "elevenlabs" |