ciyidogan commited on
Commit
7594e59
Β·
verified Β·
1 Parent(s): c8722c6

Update tts/tts_elevenlabs.py

Browse files
Files changed (1) hide show
  1. tts/tts_elevenlabs.py +108 -108
tts/tts_elevenlabs.py CHANGED
@@ -1,109 +1,109 @@
1
- """
2
- ElevenLabs TTS Implementation
3
- """
4
- import httpx
5
- from typing import Optional, Dict
6
- from tts_interface import TTSInterface
7
- from utils.logger import log_info, log_error, log_debug, log_warning
8
-
9
- class ElevenLabsTTS(TTSInterface):
10
- """ElevenLabs TTS implementation"""
11
-
12
- def __init__(self, api_key: str):
13
- super().__init__()
14
- self.api_key = api_key.strip()
15
- self.base_url = "https://api.elevenlabs.io/v1"
16
- self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
17
-
18
- # ElevenLabs preprocessing needs
19
- self.preprocessing_flags = {
20
- "PREPROCESS_NUMBERS", # Large numbers
21
- "PREPROCESS_CURRENCY", # Currency amounts
22
- "PREPROCESS_TIME", # Time format
23
- "PREPROCESS_CODES", # PNR/codes
24
- "PREPROCESS_PHONE" # Phone numbers
25
- }
26
-
27
- # Debug log
28
- masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
29
- log_debug(f"πŸ”‘ ElevenLabsTTS initialized with key: {masked_key}")
30
-
31
- async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
32
- """Convert text to speech using ElevenLabs API"""
33
- try:
34
- voice = voice_id or self.default_voice_id
35
- url = f"{self.base_url}/text-to-speech/{voice}"
36
-
37
- headers = {
38
- "xi-api-key": self.api_key,
39
- "Content-Type": "application/json"
40
- }
41
-
42
- # Default parameters
43
- data = {
44
- "text": text,
45
- "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
46
- "voice_settings": kwargs.get("voice_settings", {
47
- "stability": 1,
48
- "similarity_boost": 0.85,
49
- "style": 0.7,
50
- "speed": 1.14,
51
- "use_speaker_boost": True
52
- })
53
- }
54
-
55
- # Add optional parameters
56
- if "output_format" in kwargs:
57
- params = {"output_format": kwargs["output_format"]}
58
- else:
59
- params = {"output_format": "mp3_44100_128"}
60
-
61
- log_debug(f"🎀 Calling ElevenLabs TTS for {len(text)} characters")
62
-
63
- async with httpx.AsyncClient(timeout=30.0) as client:
64
- response = await client.post(
65
- url,
66
- headers=headers,
67
- json=data,
68
- params=params
69
- )
70
-
71
- response.raise_for_status()
72
- audio_data = response.content # This should be bytes
73
-
74
- # Ensure we're returning bytes
75
- if isinstance(audio_data, str):
76
- log_warning("ElevenLabs returned string instead of bytes")
77
- # Try to decode if it's base64
78
- try:
79
- audio_data = base64.b64decode(audio_data)
80
- except:
81
- pass
82
-
83
- log_debug(f"βœ… ElevenLabs TTS returned {len(audio_data)} bytes")
84
- log_debug(f"Audio data type: {type(audio_data)}")
85
-
86
- return audio_data
87
-
88
- except httpx.HTTPStatusError as e:
89
- log_error(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
90
- raise
91
- except Exception as e:
92
- log_error("❌ TTS synthesis error", e)
93
- raise
94
-
95
- def get_supported_voices(self) -> Dict[str, str]:
96
- """Get default voices - full list can be fetched from API"""
97
- return {
98
- "2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)",
99
- "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
100
- "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
101
- "ErXwobaYiN019PkySvjV": "Antoni (Male)",
102
- "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
103
- "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
104
- "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
105
- }
106
-
107
- def get_provider_name(self) -> str:
108
- """Get provider name"""
109
  return "elevenlabs"
 
1
+ """
2
+ ElevenLabs TTS Implementation
3
+ """
4
+ import httpx
5
+ from typing import Optional, Dict
6
+ from .tts_interface import TTSInterface
7
+ from utils.logger import log_info, log_error, log_debug, log_warning
8
+
9
+ class ElevenLabsTTS(TTSInterface):
10
+ """ElevenLabs TTS implementation"""
11
+
12
+ def __init__(self, api_key: str):
13
+ super().__init__()
14
+ self.api_key = api_key.strip()
15
+ self.base_url = "https://api.elevenlabs.io/v1"
16
+ self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
17
+
18
+ # ElevenLabs preprocessing needs
19
+ self.preprocessing_flags = {
20
+ "PREPROCESS_NUMBERS", # Large numbers
21
+ "PREPROCESS_CURRENCY", # Currency amounts
22
+ "PREPROCESS_TIME", # Time format
23
+ "PREPROCESS_CODES", # PNR/codes
24
+ "PREPROCESS_PHONE" # Phone numbers
25
+ }
26
+
27
+ # Debug log
28
+ masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
29
+ log_debug(f"πŸ”‘ ElevenLabsTTS initialized with key: {masked_key}")
30
+
31
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
32
+ """Convert text to speech using ElevenLabs API"""
33
+ try:
34
+ voice = voice_id or self.default_voice_id
35
+ url = f"{self.base_url}/text-to-speech/{voice}"
36
+
37
+ headers = {
38
+ "xi-api-key": self.api_key,
39
+ "Content-Type": "application/json"
40
+ }
41
+
42
+ # Default parameters
43
+ data = {
44
+ "text": text,
45
+ "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
46
+ "voice_settings": kwargs.get("voice_settings", {
47
+ "stability": 1,
48
+ "similarity_boost": 0.85,
49
+ "style": 0.7,
50
+ "speed": 1.14,
51
+ "use_speaker_boost": True
52
+ })
53
+ }
54
+
55
+ # Add optional parameters
56
+ if "output_format" in kwargs:
57
+ params = {"output_format": kwargs["output_format"]}
58
+ else:
59
+ params = {"output_format": "mp3_44100_128"}
60
+
61
+ log_debug(f"🎀 Calling ElevenLabs TTS for {len(text)} characters")
62
+
63
+ async with httpx.AsyncClient(timeout=30.0) as client:
64
+ response = await client.post(
65
+ url,
66
+ headers=headers,
67
+ json=data,
68
+ params=params
69
+ )
70
+
71
+ response.raise_for_status()
72
+ audio_data = response.content # This should be bytes
73
+
74
+ # Ensure we're returning bytes
75
+ if isinstance(audio_data, str):
76
+ log_warning("ElevenLabs returned string instead of bytes")
77
+ # Try to decode if it's base64
78
+ try:
79
+ audio_data = base64.b64decode(audio_data)
80
+ except:
81
+ pass
82
+
83
+ log_debug(f"βœ… ElevenLabs TTS returned {len(audio_data)} bytes")
84
+ log_debug(f"Audio data type: {type(audio_data)}")
85
+
86
+ return audio_data
87
+
88
+ except httpx.HTTPStatusError as e:
89
+ log_error(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
90
+ raise
91
+ except Exception as e:
92
+ log_error("❌ TTS synthesis error", e)
93
+ raise
94
+
95
+ def get_supported_voices(self) -> Dict[str, str]:
96
+ """Get default voices - full list can be fetched from API"""
97
+ return {
98
+ "2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)",
99
+ "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
100
+ "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
101
+ "ErXwobaYiN019PkySvjV": "Antoni (Male)",
102
+ "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
103
+ "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
104
+ "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
105
+ }
106
+
107
+ def get_provider_name(self) -> str:
108
+ """Get provider name"""
109
  return "elevenlabs"