ciyidogan commited on
Commit
9874d4d
·
verified ·
1 Parent(s): 1ad909f

Upload 6 files

Browse files
tts/tts_blaze.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Blaze TTS Implementation (Placeholder)
3
+ """
4
+ from typing import Optional, Dict
5
+ from tts_interface import TTSInterface
6
+ from logger import log_info, log_error, log_debug, log_warning
7
+
8
+ class BlazeTTS(TTSInterface):
9
+ """Placeholder for future Blaze TTS implementation"""
10
+
11
+ def __init__(self, api_key: str):
12
+ super().__init__()
13
+ self.api_key = api_key
14
+ log_warning("⚠️ BlazeTTS initialized (not implemented yet)")
15
+
16
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
17
+ """Not implemented yet"""
18
+ raise NotImplementedError("Blaze TTS not implemented yet")
19
+
20
+ def get_supported_voices(self) -> Dict[str, str]:
21
+ """Get supported voices"""
22
+ return {}
23
+
24
+ def get_provider_name(self) -> str:
25
+ """Get provider name"""
26
+ return "blaze"
tts/tts_elevenlabs.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ElevenLabs TTS Implementation
3
+ """
4
+ import httpx
5
+ from typing import Optional, Dict
6
+ from tts_interface import TTSInterface
7
+ from logger import log_info, log_error, log_debug, log_warning
8
+
9
+ class ElevenLabsTTS(TTSInterface):
10
+ """ElevenLabs TTS implementation"""
11
+
12
+ def __init__(self, api_key: str):
13
+ super().__init__()
14
+ self.api_key = api_key.strip()
15
+ self.base_url = "https://api.elevenlabs.io/v1"
16
+ self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
17
+
18
+ # ElevenLabs preprocessing needs
19
+ self.preprocessing_flags = {
20
+ "PREPROCESS_NUMBERS", # Large numbers
21
+ "PREPROCESS_CURRENCY", # Currency amounts
22
+ "PREPROCESS_TIME", # Time format
23
+ "PREPROCESS_CODES", # PNR/codes
24
+ "PREPROCESS_PHONE" # Phone numbers
25
+ }
26
+
27
+ # Debug log
28
+ masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
29
+ log_debug(f"🔑 ElevenLabsTTS initialized with key: {masked_key}")
30
+
31
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
32
+ """Convert text to speech using ElevenLabs API"""
33
+ try:
34
+ voice = voice_id or self.default_voice_id
35
+ url = f"{self.base_url}/text-to-speech/{voice}"
36
+
37
+ headers = {
38
+ "xi-api-key": self.api_key,
39
+ "Content-Type": "application/json"
40
+ }
41
+
42
+ # Default parameters
43
+ data = {
44
+ "text": text,
45
+ "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
46
+ "voice_settings": kwargs.get("voice_settings", {
47
+ "stability": 1,
48
+ "similarity_boost": 0.85,
49
+ "style": 0.7,
50
+ "speed": 1.14,
51
+ "use_speaker_boost": True
52
+ })
53
+ }
54
+
55
+ # Add optional parameters
56
+ if "output_format" in kwargs:
57
+ params = {"output_format": kwargs["output_format"]}
58
+ else:
59
+ params = {"output_format": "mp3_44100_128"}
60
+
61
+ log_debug(f"🎤 Calling ElevenLabs TTS for {len(text)} characters")
62
+
63
+ async with httpx.AsyncClient(timeout=30.0) as client:
64
+ response = await client.post(
65
+ url,
66
+ headers=headers,
67
+ json=data,
68
+ params=params
69
+ )
70
+
71
+ response.raise_for_status()
72
+ audio_data = response.content # This should be bytes
73
+
74
+ # Ensure we're returning bytes
75
+ if isinstance(audio_data, str):
76
+ log_warning("ElevenLabs returned string instead of bytes")
77
+ # Try to decode if it's base64
78
+ try:
79
+ audio_data = base64.b64decode(audio_data)
80
+ except:
81
+ pass
82
+
83
+ log_debug(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
84
+ log_debug(f"Audio data type: {type(audio_data)}")
85
+
86
+ return audio_data
87
+
88
+ except httpx.HTTPStatusError as e:
89
+ log_error(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
90
+ raise
91
+ except Exception as e:
92
+ log_error("❌ TTS synthesis error", e)
93
+ raise
94
+
95
+ def get_supported_voices(self) -> Dict[str, str]:
96
+ """Get default voices - full list can be fetched from API"""
97
+ return {
98
+ "2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)",
99
+ "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
100
+ "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
101
+ "ErXwobaYiN019PkySvjV": "Antoni (Male)",
102
+ "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
103
+ "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
104
+ "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
105
+ }
106
+
107
+ def get_provider_name(self) -> str:
108
+ """Get provider name"""
109
+ return "elevenlabs"
tts/tts_factory.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTS Provider Factory for Flare
3
+ """
4
+ from typing import Optional
5
+ from tts_interface import TTSInterface
6
+ from tts_elevenlabs import ElevenLabsTTS
7
+ from tts_blaze import BlazeTTS
8
+ from config_provider import ConfigProvider
9
+ from logger import log_info, log_error, log_debug, log_warning
10
+
11
+ class TTSFactory:
12
+ @staticmethod
13
+ def create_provider() -> Optional[TTSInterface]:
14
+ """Create TTS provider based on configuration"""
15
+ cfg = ConfigProvider.get()
16
+ tts_config = cfg.global_config.tts_provider
17
+
18
+ if not tts_config or tts_config.name == "no_tts":
19
+ log_info("🔇 No TTS provider configured")
20
+ return None
21
+
22
+ provider_name = tts_config.name
23
+ log_info(f"🏭 Creating TTS provider: {provider_name}")
24
+
25
+ # Get provider definition
26
+ provider_def = cfg.global_config.get_provider_config("tts", provider_name)
27
+ if not provider_def:
28
+ log_info(f"⚠️ Unknown TTS provider: {provider_name}")
29
+ return None
30
+
31
+ # Get API key
32
+ api_key = TTSFactory._get_api_key(tts_config)
33
+ if not api_key and provider_def.requires_api_key:
34
+ log_info(f"⚠️ No API key for TTS provider: {provider_name}")
35
+ return None
36
+
37
+ # Create provider based on name
38
+ if provider_name == "elevenlabs":
39
+ return ElevenLabsTTS(api_key)
40
+ elif provider_name == "blaze":
41
+ return BlazeTTS(api_key)
42
+ else:
43
+ log_info(f"⚠️ Unsupported TTS provider: {provider_name}")
44
+ return None
45
+
46
+ @staticmethod
47
+ def _get_api_key(tts_config) -> Optional[str]:
48
+ """Get decrypted API key"""
49
+ if not tts_config.api_key:
50
+ return None
51
+
52
+ if tts_config.api_key.startswith("enc:"):
53
+ from encryption_utils import decrypt
54
+ return decrypt(tts_config.api_key)
55
+
56
+ return tts_config.api_key
tts/tts_google.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tts_google.py
2
+ from google.cloud import texttospeech
3
+ from ssml_converter import SSMLConverter
4
+ from logger import log_info, log_error, log_debug, log_warning
5
+
6
+ class GoogleCloudTTS(TTSInterface):
7
+ """Google Cloud Text-to-Speech implementation"""
8
+
9
+ def __init__(self, credentials_path: str):
10
+ super().__init__()
11
+ self.supports_ssml = True
12
+ self.credentials_path = credentials_path
13
+
14
+ # Google TTS doesn't need preprocessing with SSML
15
+ self.preprocessing_flags = set()
16
+
17
+ # Initialize client
18
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
19
+ self.client = texttospeech.TextToSpeechClient()
20
+
21
+ # SSML converter
22
+ self.ssml_converter = SSMLConverter(language="tr-TR")
23
+
24
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
25
+ """Convert text to speech using Google Cloud TTS"""
26
+ try:
27
+ # Check if SSML should be used
28
+ use_ssml = kwargs.get("use_ssml", True)
29
+
30
+ if use_ssml and not text.startswith("<speak>"):
31
+ # Convert to SSML
32
+ text = self.ssml_converter.convert_to_ssml(text)
33
+ log_info(f"📝 Converted to SSML: {text[:200]}...")
34
+ input_text = texttospeech.SynthesisInput(ssml=text)
35
+ else:
36
+ input_text = texttospeech.SynthesisInput(text=text)
37
+
38
+ # Voice selection
39
+ voice = texttospeech.VoiceSelectionParams(
40
+ language_code=kwargs.get("language_code", "tr-TR"),
41
+ name=voice_id or "tr-TR-Wavenet-B",
42
+ ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
43
+ )
44
+
45
+ # Audio config
46
+ audio_config = texttospeech.AudioConfig(
47
+ audio_encoding=texttospeech.AudioEncoding.MP3,
48
+ speaking_rate=kwargs.get("speaking_rate", 1.0),
49
+ pitch=kwargs.get("pitch", 0.0),
50
+ volume_gain_db=kwargs.get("volume_gain_db", 0.0)
51
+ )
52
+
53
+ # Perform synthesis
54
+ response = self.client.synthesize_speech(
55
+ input=input_text,
56
+ voice=voice,
57
+ audio_config=audio_config
58
+ )
59
+
60
+ log_info(f"✅ Google TTS returned {len(response.audio_content)} bytes")
61
+ return response.audio_content
62
+
63
+ except Exception as e:
64
+ log_error("❌ Google TTS error", e)
65
+ raise
tts/tts_interface.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTS Interface for Flare
3
+ """
4
+ from abc import ABC, abstractmethod
5
+ from typing import Optional, Dict, Any, Set
6
+ from datetime import datetime
7
+ import sys
8
+
9
+ class TTSInterface(ABC):
10
+ """Abstract base class for TTS providers"""
11
+
12
+ def __init__(self):
13
+ self.preprocessing_flags: Set[str] = set()
14
+ self.supports_ssml: bool = False
15
+
16
+ @abstractmethod
17
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
18
+ """
19
+ Convert text to speech and return audio bytes
20
+
21
+ Args:
22
+ text: Text to convert to speech
23
+ voice_id: Optional voice ID specific to the provider
24
+ **kwargs: Additional provider-specific parameters
25
+
26
+ Returns:
27
+ Audio data as bytes (MP3 or WAV format)
28
+ """
29
+ pass
30
+
31
+ @abstractmethod
32
+ def get_supported_voices(self) -> Dict[str, str]:
33
+ """Get list of supported voices"""
34
+ pass
35
+
36
+ @abstractmethod
37
+ def get_provider_name(self) -> str:
38
+ """Get provider name for logging"""
39
+ pass
40
+
41
+ def get_preprocessing_flags(self) -> Set[str]:
42
+ """Get preprocessing flags for this provider"""
43
+ return self.preprocessing_flags
44
+
45
+ def supports_ssml_format(self) -> bool:
46
+ """Check if provider supports SSML"""
47
+ return self.supports_ssml
tts/tts_preprocessor.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTS Text Preprocessing Utilities with Multilingual Support
3
+ """
4
+
5
+ import re
6
+ import json
7
+ from typing import Dict, Set, Optional
8
+ from num2words import num2words
9
+ from pathlib import Path
10
+ from locale_manager import LocaleManager
11
+
12
+ class TTSPreprocessor:
13
+ """Text preprocessor for TTS providers with multilingual support"""
14
+
15
+ # Preprocessing flags
16
+ PREPROCESS_NUMBERS = "numbers"
17
+ PREPROCESS_CURRENCY = "currency"
18
+ PREPROCESS_TIME = "time"
19
+ PREPROCESS_DATE = "date"
20
+ PREPROCESS_CODES = "codes"
21
+ PREPROCESS_PERCENTAGE = "percentage"
22
+
23
+ def __init__(self, language: str = "tr"):
24
+ self.language = language
25
+ self.locale_data = LocaleManager.get_locale(language)
26
+
27
+ def preprocess(self, text: str, flags: Set[str]) -> str:
28
+ """Apply preprocessing based on flags"""
29
+
30
+ if self.PREPROCESS_CURRENCY in flags:
31
+ text = self._process_currency(text)
32
+
33
+ if self.PREPROCESS_TIME in flags:
34
+ text = self._process_time(text)
35
+
36
+ if self.PREPROCESS_DATE in flags:
37
+ text = self._process_date(text)
38
+
39
+ if self.PREPROCESS_CODES in flags:
40
+ text = self._process_codes(text)
41
+
42
+ if self.PREPROCESS_PERCENTAGE in flags:
43
+ text = self._process_percentage(text)
44
+
45
+ # Numbers should be processed last to avoid conflicts
46
+ if self.PREPROCESS_NUMBERS in flags:
47
+ text = self._process_numbers(text)
48
+
49
+ return text
50
+
51
+ def _process_numbers(self, text: str) -> str:
52
+ """Convert numbers to words based on locale"""
53
+ decimal_sep = self.locale_data["numbers"]["decimal_separator"]
54
+ thousands_sep = self.locale_data["numbers"]["thousands_separator"]
55
+ decimal_word = self.locale_data["numbers"]["decimal_word"]
56
+ threshold = self.locale_data.get("small_number_threshold", 100)
57
+
58
+ def replace_number(match):
59
+ num_str = match.group()
60
+
61
+ # Normalize number format
62
+ if self.language == "tr":
63
+ # Turkish: 1.234,56 -> 1234.56
64
+ num_str = num_str.replace('.', '').replace(',', '.')
65
+ else:
66
+ # English: 1,234.56 -> 1234.56
67
+ num_str = num_str.replace(',', '')
68
+
69
+ try:
70
+ num = float(num_str)
71
+ if num.is_integer():
72
+ num = int(num)
73
+
74
+ # Keep small numbers as is based on threshold
75
+ if isinstance(num, int) and 0 <= num <= threshold:
76
+ return str(num)
77
+
78
+ # Convert large numbers to words
79
+ if isinstance(num, int):
80
+ try:
81
+ return num2words(num, lang=self.language)
82
+ except NotImplementedError:
83
+ # Fallback to English if language not supported
84
+ return num2words(num, lang='en')
85
+ else:
86
+ # Handle decimal
87
+ integer_part = int(num)
88
+ decimal_part = int((num - integer_part) * 100)
89
+
90
+ try:
91
+ int_words = num2words(integer_part, lang=self.language)
92
+ dec_words = num2words(decimal_part, lang=self.language)
93
+ return f"{int_words} {decimal_word} {dec_words}"
94
+ except NotImplementedError:
95
+ # Fallback
96
+ int_words = num2words(integer_part, lang='en')
97
+ dec_words = num2words(decimal_part, lang='en')
98
+ return f"{int_words} {decimal_word} {dec_words}"
99
+
100
+ except:
101
+ return num_str
102
+
103
+ # Match numbers with locale-specific format
104
+ if self.language == "tr":
105
+ pattern = r'\b\d{1,3}(?:\.\d{3})*(?:,\d+)?\b|\b\d+(?:,\d+)?\b'
106
+ else:
107
+ pattern = r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b|\b\d+(?:\.\d+)?\b'
108
+
109
+ return re.sub(pattern, replace_number, text)
110
+
111
+ def _process_codes(self, text: str) -> str:
112
+ """Process codes like PNR, flight numbers - language agnostic"""
113
+ def spell_code(match):
114
+ code = match.group()
115
+ return ' '.join(code)
116
+
117
+ # Match uppercase letters followed by numbers
118
+ pattern = r'\b[A-Z]{2,5}\d{2,5}\b'
119
+ return re.sub(pattern, spell_code, text)
120
+
121
+ def _process_currency(self, text: str) -> str:
122
+ """Process currency symbols and amounts based on locale"""
123
+ currency_data = self.locale_data.get("currency", {})
124
+
125
+ if not isinstance(currency_data, dict):
126
+ return text
127
+
128
+ symbol = currency_data.get("symbol", "")
129
+ word = currency_data.get("word", "")
130
+ code = currency_data.get("code", "")
131
+ position = currency_data.get("position", "before")
132
+
133
+ if symbol and word:
134
+ # Replace standalone symbols
135
+ text = text.replace(symbol, f" {word} ")
136
+
137
+ # Replace symbol with amount
138
+ if position == "before":
139
+ # $100 -> 100 dollar
140
+ pattern = rf'{re.escape(symbol)}\s*(\d+(?:[.,]\d+)?)'
141
+ text = re.sub(pattern, rf'\1 {word}', text)
142
+ else:
143
+ # 100₺ -> 100 lira
144
+ pattern = rf'(\d+(?:[.,]\d+)?)\s*{re.escape(symbol)}'
145
+ text = re.sub(pattern, rf'\1 {word}', text)
146
+
147
+ # Process currency codes
148
+ if code and word:
149
+ pattern = rf'(\d+(?:[.,]\d+)?)\s*{code}\b'
150
+ text = re.sub(pattern, rf'\1 {word}', text, flags=re.IGNORECASE)
151
+
152
+ return text
153
+
154
+ def _process_percentage(self, text: str) -> str:
155
+ """Process percentage symbols based on locale"""
156
+ percentage = self.locale_data.get("percentage", {})
157
+
158
+ if not isinstance(percentage, dict):
159
+ return text
160
+
161
+ word = percentage.get("word", "percent")
162
+ position = percentage.get("position", "after")
163
+
164
+ if position == "before":
165
+ # %50 -> yüzde 50
166
+ pattern = r'%\s*(\d+(?:[.,]\d+)?)'
167
+ replacement = rf'{word} \1'
168
+ else:
169
+ # 50% -> 50 percent
170
+ pattern = r'(\d+(?:[.,]\d+)?)\s*%'
171
+ replacement = rf'\1 {word}'
172
+
173
+ return re.sub(pattern, replacement, text)
174
+
175
+ def _process_date(self, text: str) -> str:
176
+ """Process date formats based on locale"""
177
+ months = self.locale_data.get("months", {})
178
+ date_format = self.locale_data.get("date_format", "YYYY-MM-DD")
179
+
180
+ if not isinstance(months, dict):
181
+ return text
182
+
183
+ # Convert ISO format dates
184
+ def replace_date(match):
185
+ year, month, day = match.groups()
186
+ month_name = months.get(month, month)
187
+
188
+ # Format based on locale preference
189
+ if "DD.MM.YYYY" in date_format:
190
+ # Turkish format with month name
191
+ return f"{int(day)} {month_name} {year}"
192
+ elif "MM/DD/YYYY" in date_format:
193
+ # US format with month name
194
+ return f"{month_name} {int(day)}, {year}"
195
+ else:
196
+ return match.group()
197
+
198
+ pattern = r'(\d{4})-(\d{2})-(\d{2})'
199
+ return re.sub(pattern, replace_date, text)
200
+
201
+ def _process_time(self, text: str) -> str:
202
+ """Process time formats based on locale"""
203
+ time_data = self.locale_data.get("time", {})
204
+
205
+ if not isinstance(time_data, dict):
206
+ time_format = "word"
207
+ separator = " "
208
+ else:
209
+ time_format = time_data.get("format", "word")
210
+ separator = time_data.get("separator", " ")
211
+
212
+ def replace_time(match):
213
+ hour, minute = match.groups()
214
+ hour_int = int(hour)
215
+ minute_int = int(minute)
216
+
217
+ if time_format == "word":
218
+ try:
219
+ hour_word = num2words(hour_int, lang=self.language)
220
+ minute_word = num2words(minute_int, lang=self.language) if minute_int > 0 else ""
221
+
222
+ if minute_int == 0:
223
+ return hour_word
224
+ else:
225
+ return f"{hour_word}{separator}{minute_word}"
226
+ except NotImplementedError:
227
+ return f"{hour} {minute}"
228
+ else:
229
+ return f"{hour} {minute}"
230
+
231
+ pattern = r'(\d{1,2}):(\d{2})'
232
+ return re.sub(pattern, replace_time, text)