Spaces:
Building
Building
Update tts_interface.py
Browse files- tts_interface.py +52 -51
tts_interface.py
CHANGED
@@ -3,7 +3,7 @@ TTS Interface and Implementations
|
|
3 |
"""
|
4 |
|
5 |
from abc import ABC, abstractmethod
|
6 |
-
from typing import Optional, Dict, Any
|
7 |
import httpx
|
8 |
import os
|
9 |
from datetime import datetime
|
@@ -33,12 +33,11 @@ class TTSInterface(ABC):
|
|
33 |
"""Get list of supported voices"""
|
34 |
pass
|
35 |
|
36 |
-
@abstractmethod
|
37 |
def get_preprocessing_flags(self) -> Set[str]:
|
38 |
"""Get preprocessing flags for this provider"""
|
39 |
-
|
40 |
|
41 |
-
def
|
42 |
"""Check if provider supports SSML"""
|
43 |
return self.supports_ssml
|
44 |
|
@@ -46,6 +45,7 @@ class ElevenLabsTTS(TTSInterface):
|
|
46 |
"""ElevenLabs TTS implementation"""
|
47 |
|
48 |
def __init__(self, api_key: str):
|
|
|
49 |
self.api_key = api_key
|
50 |
self.base_url = "https://api.elevenlabs.io/v1"
|
51 |
self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
|
@@ -60,54 +60,58 @@ class ElevenLabsTTS(TTSInterface):
|
|
60 |
}
|
61 |
|
62 |
self.preprocessor = TTSPreprocessor(language="tr")
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
69 |
"""Convert text to speech using ElevenLabs API"""
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
"
|
92 |
-
"
|
|
|
93 |
"use_speaker_boost": True
|
94 |
-
})
|
95 |
}
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
106 |
response = await client.post(
|
107 |
-
url,
|
|
|
108 |
headers=headers,
|
109 |
-
|
110 |
-
params=params
|
111 |
)
|
112 |
|
113 |
response.raise_for_status()
|
@@ -134,14 +138,11 @@ class ElevenLabsTTS(TTSInterface):
|
|
134 |
"yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
|
135 |
}
|
136 |
|
137 |
-
def get_preprocessing_flags(self) -> Set[str]:
|
138 |
-
"""Get preprocessing flags for ElevenLabs"""
|
139 |
-
return self.preprocessing_flags
|
140 |
-
|
141 |
class BlazeTTS(TTSInterface):
|
142 |
"""Placeholder for future Blaze TTS implementation"""
|
143 |
|
144 |
def __init__(self, api_key: str):
|
|
|
145 |
self.api_key = api_key
|
146 |
|
147 |
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
|
|
|
3 |
"""
|
4 |
|
5 |
from abc import ABC, abstractmethod
|
6 |
+
from typing import Optional, Dict, Any, Set
|
7 |
import httpx
|
8 |
import os
|
9 |
from datetime import datetime
|
|
|
33 |
"""Get list of supported voices"""
|
34 |
pass
|
35 |
|
|
|
36 |
def get_preprocessing_flags(self) -> Set[str]:
|
37 |
"""Get preprocessing flags for this provider"""
|
38 |
+
return self.preprocessing_flags
|
39 |
|
40 |
+
def supports_ssml_format(self) -> bool:
|
41 |
"""Check if provider supports SSML"""
|
42 |
return self.supports_ssml
|
43 |
|
|
|
45 |
"""ElevenLabs TTS implementation"""
|
46 |
|
47 |
def __init__(self, api_key: str):
|
48 |
+
super().__init__()
|
49 |
self.api_key = api_key
|
50 |
self.base_url = "https://api.elevenlabs.io/v1"
|
51 |
self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
|
|
|
60 |
}
|
61 |
|
62 |
self.preprocessor = TTSPreprocessor(language="tr")
|
63 |
+
|
64 |
+
async def synthesize(
|
65 |
+
self,
|
66 |
+
text: str,
|
67 |
+
voice_id: Optional[str] = None,
|
68 |
+
model_id: Optional[str] = None,
|
69 |
+
output_format: Optional[str] = None,
|
70 |
+
**kwargs
|
71 |
+
) -> bytes:
|
72 |
"""Convert text to speech using ElevenLabs API"""
|
73 |
+
|
74 |
+
# Preprocess text
|
75 |
+
processed_text = self.preprocessor.process(text, self.preprocessing_flags)
|
76 |
+
|
77 |
+
# Use defaults if not provided
|
78 |
+
voice_id = voice_id or self.default_voice_id
|
79 |
+
model_id = model_id or "eleven_multilingual_v2"
|
80 |
+
output_format = output_format or "mp3_44100_128"
|
81 |
+
|
82 |
+
url = f"{self.base_url}/text-to-speech/{voice_id}"
|
83 |
+
|
84 |
+
headers = {
|
85 |
+
"Accept": "audio/mpeg",
|
86 |
+
"Content-Type": "application/json",
|
87 |
+
"xi-api-key": self.api_key
|
88 |
+
}
|
89 |
+
|
90 |
+
data = {
|
91 |
+
"text": processed_text,
|
92 |
+
"model_id": model_id,
|
93 |
+
"voice_settings": {
|
94 |
+
"stability": 0.5,
|
95 |
+
"similarity_boost": 0.75,
|
96 |
+
"style": 0.0,
|
97 |
"use_speaker_boost": True
|
|
|
98 |
}
|
99 |
+
}
|
100 |
+
|
101 |
+
# Add output format to URL if specified
|
102 |
+
if output_format:
|
103 |
+
url += f"?output_format={output_format}"
|
104 |
+
|
105 |
+
try:
|
106 |
+
async with httpx.AsyncClient() as client:
|
107 |
+
log(f"π€ ElevenLabs TTS request: voice={voice_id}, model={model_id}")
|
108 |
+
log(f"π Text (first 100 chars): {processed_text[:100]}...")
|
109 |
+
|
110 |
response = await client.post(
|
111 |
+
url,
|
112 |
+
json=data,
|
113 |
headers=headers,
|
114 |
+
timeout=30.0
|
|
|
115 |
)
|
116 |
|
117 |
response.raise_for_status()
|
|
|
138 |
"yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
|
139 |
}
|
140 |
|
|
|
|
|
|
|
|
|
141 |
class BlazeTTS(TTSInterface):
|
142 |
"""Placeholder for future Blaze TTS implementation"""
|
143 |
|
144 |
def __init__(self, api_key: str):
|
145 |
+
super().__init__()
|
146 |
self.api_key = api_key
|
147 |
|
148 |
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
|