ciyidogan commited on
Commit
595d1d3
Β·
verified Β·
1 Parent(s): 312aa6f

Create tts_elevenlabs.py

Browse files
Files changed (1) hide show
  1. tts_elevenlabs.py +97 -0
tts_elevenlabs.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ElevenLabs TTS Implementation
3
+ """
4
+ import httpx
5
+ from typing import Optional, Dict
6
+ from tts_interface import TTSInterface, log
7
+
8
+ class ElevenLabsTTS(TTSInterface):
9
+ """ElevenLabs TTS implementation"""
10
+
11
+ def __init__(self, api_key: str):
12
+ super().__init__()
13
+ self.api_key = api_key.strip()
14
+ self.base_url = "https://api.elevenlabs.io/v1"
15
+ self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
16
+
17
+ # ElevenLabs preprocessing needs
18
+ self.preprocessing_flags = {
19
+ "PREPROCESS_NUMBERS", # Large numbers
20
+ "PREPROCESS_CURRENCY", # Currency amounts
21
+ "PREPROCESS_TIME", # Time format
22
+ "PREPROCESS_CODES", # PNR/codes
23
+ "PREPROCESS_PHONE" # Phone numbers
24
+ }
25
+
26
+ # Debug log
27
+ masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
28
+ log(f"πŸ”‘ ElevenLabsTTS initialized with key: {masked_key}")
29
+
30
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
31
+ """Convert text to speech using ElevenLabs API"""
32
+ try:
33
+ voice = voice_id or self.default_voice_id
34
+ url = f"{self.base_url}/text-to-speech/{voice}"
35
+
36
+ headers = {
37
+ "xi-api-key": self.api_key,
38
+ "Content-Type": "application/json"
39
+ }
40
+
41
+ # Default parameters
42
+ data = {
43
+ "text": text,
44
+ "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
45
+ "voice_settings": kwargs.get("voice_settings", {
46
+ "stability": 1,
47
+ "similarity_boost": 0.85,
48
+ "style": 0.7,
49
+ "speed": 1.14,
50
+ "use_speaker_boost": True
51
+ })
52
+ }
53
+
54
+ # Add optional parameters
55
+ if "output_format" in kwargs:
56
+ params = {"output_format": kwargs["output_format"]}
57
+ else:
58
+ params = {"output_format": "mp3_44100_128"}
59
+
60
+ log(f"🎀 Calling ElevenLabs TTS for {len(text)} characters")
61
+
62
+ async with httpx.AsyncClient(timeout=30.0) as client:
63
+ response = await client.post(
64
+ url,
65
+ headers=headers,
66
+ json=data,
67
+ params=params
68
+ )
69
+
70
+ response.raise_for_status()
71
+ audio_data = response.content
72
+
73
+ log(f"βœ… ElevenLabs TTS returned {len(audio_data)} bytes")
74
+ return audio_data
75
+
76
+ except httpx.HTTPStatusError as e:
77
+ log(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
78
+ raise
79
+ except Exception as e:
80
+ log(f"❌ TTS synthesis error: {e}")
81
+ raise
82
+
83
+ def get_supported_voices(self) -> Dict[str, str]:
84
+ """Get default voices - full list can be fetched from API"""
85
+ return {
86
+ "2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)",
87
+ "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
88
+ "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
89
+ "ErXwobaYiN019PkySvjV": "Antoni (Male)",
90
+ "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
91
+ "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
92
+ "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
93
+ }
94
+
95
+ def get_provider_name(self) -> str:
96
+ """Get provider name"""
97
+ return "elevenlabs"