ciyidogan commited on
Commit
312aa6f
·
verified ·
1 Parent(s): 71406b0

Update tts_interface.py

Browse files
Files changed (1) hide show
  1. tts_interface.py +18 -132
tts_interface.py CHANGED
@@ -1,16 +1,11 @@
1
  """
2
- TTS Interface and Implementations
3
  """
4
-
5
  from abc import ABC, abstractmethod
6
  from typing import Optional, Dict, Any, Set
7
- import httpx
8
- import os
9
  from datetime import datetime
10
  import sys
11
 
12
- from tts_preprocessor import TTSPreprocessor
13
-
14
  def log(message: str):
15
  timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
16
  print(f"[{timestamp}] {message}")
@@ -25,7 +20,17 @@ class TTSInterface(ABC):
25
 
26
  @abstractmethod
27
  async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
28
- """Convert text to speech and return audio bytes"""
 
 
 
 
 
 
 
 
 
 
29
  pass
30
 
31
  @abstractmethod
@@ -33,134 +38,15 @@ class TTSInterface(ABC):
33
  """Get list of supported voices"""
34
  pass
35
 
 
 
 
 
 
36
  def get_preprocessing_flags(self) -> Set[str]:
37
  """Get preprocessing flags for this provider"""
38
  return self.preprocessing_flags
39
 
40
  def supports_ssml_format(self) -> bool:
41
  """Check if provider supports SSML"""
42
- return self.supports_ssml
43
-
44
- class ElevenLabsTTS(TTSInterface):
45
- """ElevenLabs TTS implementation"""
46
-
47
- def __init__(self, api_key: str):
48
- super().__init__()
49
- self.api_key = api_key.strip() # Başındaki/sonundaki boşlukları temizle
50
- self.base_url = "https://api.elevenlabs.io/v1"
51
- self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
52
-
53
- # ElevenLabs için preprocessing gereken alanlar
54
- self.preprocessing_flags = {
55
- TTSPreprocessor.PREPROCESS_NUMBERS, # Büyük sayılar
56
- TTSPreprocessor.PREPROCESS_CURRENCY, # Para birimleri
57
- TTSPreprocessor.PREPROCESS_TIME, # Saat formatı
58
- TTSPreprocessor.PREPROCESS_CODES, # PNR kodları
59
- TTSPreprocessor.PREPROCESS_PERCENTAGE # Yüzdeler
60
- }
61
-
62
- # tr-TR -> tr dönüşümü
63
- self.preprocessor = TTSPreprocessor(language="tr")
64
-
65
- async def synthesize(
66
- self,
67
- text: str,
68
- voice_id: Optional[str] = None,
69
- model_id: Optional[str] = None,
70
- output_format: Optional[str] = None,
71
- **kwargs
72
- ) -> bytes:
73
- """Convert text to speech using ElevenLabs API"""
74
-
75
- # Preprocess text
76
- processed_text = self.preprocessor.preprocess(text, self.preprocessing_flags)
77
-
78
- # Use defaults if not provided
79
- voice_id = voice_id or self.default_voice_id
80
- model_id = model_id or "eleven_multilingual_v2"
81
- output_format = output_format or "mp3_44100_128"
82
-
83
- url = f"{self.base_url}/text-to-speech/{voice_id}"
84
-
85
- headers = {
86
- "Accept": "audio/mpeg",
87
- "Content-Type": "application/json",
88
- "xi-api-key": self.api_key
89
- }
90
-
91
- data = {
92
- "text": processed_text,
93
- "model_id": model_id,
94
- "voice_settings": {
95
- "stability": 0.5,
96
- "similarity_boost": 0.75,
97
- "style": 0.0,
98
- "use_speaker_boost": True
99
- }
100
- }
101
-
102
- # Add output format to URL if specified
103
- if output_format:
104
- url += f"?output_format={output_format}"
105
-
106
- try:
107
- async with httpx.AsyncClient() as client:
108
- log(f"🎤 ElevenLabs TTS request: voice={voice_id}, model={model_id}")
109
- log(f"📝 Text (first 100 chars): {processed_text[:100]}...")
110
-
111
- response = await client.post(
112
- url,
113
- json=data,
114
- headers=headers,
115
- timeout=30.0
116
- )
117
-
118
- response.raise_for_status()
119
- audio_data = response.content
120
-
121
- log(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
122
- return audio_data
123
-
124
- except httpx.HTTPStatusError as e:
125
- log(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
126
- raise
127
- except Exception as e:
128
- log(f"❌ TTS synthesis error: {e}")
129
- raise
130
-
131
- def get_supported_voices(self) -> Dict[str, str]:
132
- """Get default voices - full list can be fetched from API"""
133
- return {
134
- "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
135
- "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
136
- "ErXwobaYiN019PkySvjV": "Antoni (Male)",
137
- "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
138
- "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
139
- "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
140
- }
141
-
142
- class BlazeTTS(TTSInterface):
143
- """Placeholder for future Blaze TTS implementation"""
144
-
145
- def __init__(self, api_key: str):
146
- super().__init__()
147
- self.api_key = api_key
148
-
149
- async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
150
- raise NotImplementedError("Blaze TTS not implemented yet")
151
-
152
- def get_supported_voices(self) -> Dict[str, str]:
153
- return {}
154
-
155
-
156
- def create_tts_provider(engine: str, api_key: Optional[str] = None) -> Optional[TTSInterface]:
157
- """Factory function to create TTS provider instances"""
158
- if engine == "elevenlabs" and api_key:
159
- return ElevenLabsTTS(api_key)
160
- elif engine == "blaze" and api_key:
161
- return BlazeTTS(api_key)
162
- elif engine == "no_tts":
163
- return None
164
- else:
165
- log(f"⚠️ Unknown or unconfigured TTS engine: {engine}")
166
- return None
 
1
  """
2
+ TTS Interface for Flare
3
  """
 
4
  from abc import ABC, abstractmethod
5
  from typing import Optional, Dict, Any, Set
 
 
6
  from datetime import datetime
7
  import sys
8
 
 
 
9
  def log(message: str):
10
  timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
11
  print(f"[{timestamp}] {message}")
 
20
 
21
  @abstractmethod
22
  async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
23
+ """
24
+ Convert text to speech and return audio bytes
25
+
26
+ Args:
27
+ text: Text to convert to speech
28
+ voice_id: Optional voice ID specific to the provider
29
+ **kwargs: Additional provider-specific parameters
30
+
31
+ Returns:
32
+ Audio data as bytes (MP3 or WAV format)
33
+ """
34
  pass
35
 
36
  @abstractmethod
 
38
  """Get list of supported voices"""
39
  pass
40
 
41
+ @abstractmethod
42
+ def get_provider_name(self) -> str:
43
+ """Get provider name for logging"""
44
+ pass
45
+
46
  def get_preprocessing_flags(self) -> Set[str]:
47
  """Get preprocessing flags for this provider"""
48
  return self.preprocessing_flags
49
 
50
  def supports_ssml_format(self) -> bool:
51
  """Check if provider supports SSML"""
52
+ return self.supports_ssml