Spaces:
Building
Building
Upload 6 files
Browse files- tts/tts_blaze.py +26 -0
- tts/tts_elevenlabs.py +109 -0
- tts/tts_factory.py +56 -0
- tts/tts_google.py +65 -0
- tts/tts_interface.py +47 -0
- tts/tts_preprocessor.py +232 -0
tts/tts_blaze.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Blaze TTS Implementation (Placeholder)
|
3 |
+
"""
|
4 |
+
from typing import Optional, Dict
|
5 |
+
from tts_interface import TTSInterface
|
6 |
+
from logger import log_info, log_error, log_debug, log_warning
|
7 |
+
|
8 |
+
class BlazeTTS(TTSInterface):
|
9 |
+
"""Placeholder for future Blaze TTS implementation"""
|
10 |
+
|
11 |
+
def __init__(self, api_key: str):
|
12 |
+
super().__init__()
|
13 |
+
self.api_key = api_key
|
14 |
+
log_warning("⚠️ BlazeTTS initialized (not implemented yet)")
|
15 |
+
|
16 |
+
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
|
17 |
+
"""Not implemented yet"""
|
18 |
+
raise NotImplementedError("Blaze TTS not implemented yet")
|
19 |
+
|
20 |
+
def get_supported_voices(self) -> Dict[str, str]:
|
21 |
+
"""Get supported voices"""
|
22 |
+
return {}
|
23 |
+
|
24 |
+
def get_provider_name(self) -> str:
|
25 |
+
"""Get provider name"""
|
26 |
+
return "blaze"
|
tts/tts_elevenlabs.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
ElevenLabs TTS Implementation
|
3 |
+
"""
|
4 |
+
import httpx
|
5 |
+
from typing import Optional, Dict
|
6 |
+
from tts_interface import TTSInterface
|
7 |
+
from logger import log_info, log_error, log_debug, log_warning
|
8 |
+
|
9 |
+
class ElevenLabsTTS(TTSInterface):
|
10 |
+
"""ElevenLabs TTS implementation"""
|
11 |
+
|
12 |
+
def __init__(self, api_key: str):
|
13 |
+
super().__init__()
|
14 |
+
self.api_key = api_key.strip()
|
15 |
+
self.base_url = "https://api.elevenlabs.io/v1"
|
16 |
+
self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
|
17 |
+
|
18 |
+
# ElevenLabs preprocessing needs
|
19 |
+
self.preprocessing_flags = {
|
20 |
+
"PREPROCESS_NUMBERS", # Large numbers
|
21 |
+
"PREPROCESS_CURRENCY", # Currency amounts
|
22 |
+
"PREPROCESS_TIME", # Time format
|
23 |
+
"PREPROCESS_CODES", # PNR/codes
|
24 |
+
"PREPROCESS_PHONE" # Phone numbers
|
25 |
+
}
|
26 |
+
|
27 |
+
# Debug log
|
28 |
+
masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
|
29 |
+
log_debug(f"🔑 ElevenLabsTTS initialized with key: {masked_key}")
|
30 |
+
|
31 |
+
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
|
32 |
+
"""Convert text to speech using ElevenLabs API"""
|
33 |
+
try:
|
34 |
+
voice = voice_id or self.default_voice_id
|
35 |
+
url = f"{self.base_url}/text-to-speech/{voice}"
|
36 |
+
|
37 |
+
headers = {
|
38 |
+
"xi-api-key": self.api_key,
|
39 |
+
"Content-Type": "application/json"
|
40 |
+
}
|
41 |
+
|
42 |
+
# Default parameters
|
43 |
+
data = {
|
44 |
+
"text": text,
|
45 |
+
"model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
|
46 |
+
"voice_settings": kwargs.get("voice_settings", {
|
47 |
+
"stability": 1,
|
48 |
+
"similarity_boost": 0.85,
|
49 |
+
"style": 0.7,
|
50 |
+
"speed": 1.14,
|
51 |
+
"use_speaker_boost": True
|
52 |
+
})
|
53 |
+
}
|
54 |
+
|
55 |
+
# Add optional parameters
|
56 |
+
if "output_format" in kwargs:
|
57 |
+
params = {"output_format": kwargs["output_format"]}
|
58 |
+
else:
|
59 |
+
params = {"output_format": "mp3_44100_128"}
|
60 |
+
|
61 |
+
log_debug(f"🎤 Calling ElevenLabs TTS for {len(text)} characters")
|
62 |
+
|
63 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
64 |
+
response = await client.post(
|
65 |
+
url,
|
66 |
+
headers=headers,
|
67 |
+
json=data,
|
68 |
+
params=params
|
69 |
+
)
|
70 |
+
|
71 |
+
response.raise_for_status()
|
72 |
+
audio_data = response.content # This should be bytes
|
73 |
+
|
74 |
+
# Ensure we're returning bytes
|
75 |
+
if isinstance(audio_data, str):
|
76 |
+
log_warning("ElevenLabs returned string instead of bytes")
|
77 |
+
# Try to decode if it's base64
|
78 |
+
try:
|
79 |
+
audio_data = base64.b64decode(audio_data)
|
80 |
+
except:
|
81 |
+
pass
|
82 |
+
|
83 |
+
log_debug(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
|
84 |
+
log_debug(f"Audio data type: {type(audio_data)}")
|
85 |
+
|
86 |
+
return audio_data
|
87 |
+
|
88 |
+
except httpx.HTTPStatusError as e:
|
89 |
+
log_error(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
|
90 |
+
raise
|
91 |
+
except Exception as e:
|
92 |
+
log_error("❌ TTS synthesis error", e)
|
93 |
+
raise
|
94 |
+
|
95 |
+
def get_supported_voices(self) -> Dict[str, str]:
|
96 |
+
"""Get default voices - full list can be fetched from API"""
|
97 |
+
return {
|
98 |
+
"2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)",
|
99 |
+
"21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
|
100 |
+
"EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
|
101 |
+
"ErXwobaYiN019PkySvjV": "Antoni (Male)",
|
102 |
+
"VR6AewLTigWG4xSOukaG": "Arnold (Male)",
|
103 |
+
"pNInz6obpgDQGcFmaJgB": "Adam (Male)",
|
104 |
+
"yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
|
105 |
+
}
|
106 |
+
|
107 |
+
def get_provider_name(self) -> str:
|
108 |
+
"""Get provider name"""
|
109 |
+
return "elevenlabs"
|
tts/tts_factory.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
TTS Provider Factory for Flare
|
3 |
+
"""
|
4 |
+
from typing import Optional
|
5 |
+
from tts_interface import TTSInterface
|
6 |
+
from tts_elevenlabs import ElevenLabsTTS
|
7 |
+
from tts_blaze import BlazeTTS
|
8 |
+
from config_provider import ConfigProvider
|
9 |
+
from logger import log_info, log_error, log_debug, log_warning
|
10 |
+
|
11 |
+
class TTSFactory:
|
12 |
+
@staticmethod
|
13 |
+
def create_provider() -> Optional[TTSInterface]:
|
14 |
+
"""Create TTS provider based on configuration"""
|
15 |
+
cfg = ConfigProvider.get()
|
16 |
+
tts_config = cfg.global_config.tts_provider
|
17 |
+
|
18 |
+
if not tts_config or tts_config.name == "no_tts":
|
19 |
+
log_info("🔇 No TTS provider configured")
|
20 |
+
return None
|
21 |
+
|
22 |
+
provider_name = tts_config.name
|
23 |
+
log_info(f"🏭 Creating TTS provider: {provider_name}")
|
24 |
+
|
25 |
+
# Get provider definition
|
26 |
+
provider_def = cfg.global_config.get_provider_config("tts", provider_name)
|
27 |
+
if not provider_def:
|
28 |
+
log_info(f"⚠️ Unknown TTS provider: {provider_name}")
|
29 |
+
return None
|
30 |
+
|
31 |
+
# Get API key
|
32 |
+
api_key = TTSFactory._get_api_key(tts_config)
|
33 |
+
if not api_key and provider_def.requires_api_key:
|
34 |
+
log_info(f"⚠️ No API key for TTS provider: {provider_name}")
|
35 |
+
return None
|
36 |
+
|
37 |
+
# Create provider based on name
|
38 |
+
if provider_name == "elevenlabs":
|
39 |
+
return ElevenLabsTTS(api_key)
|
40 |
+
elif provider_name == "blaze":
|
41 |
+
return BlazeTTS(api_key)
|
42 |
+
else:
|
43 |
+
log_info(f"⚠️ Unsupported TTS provider: {provider_name}")
|
44 |
+
return None
|
45 |
+
|
46 |
+
@staticmethod
|
47 |
+
def _get_api_key(tts_config) -> Optional[str]:
|
48 |
+
"""Get decrypted API key"""
|
49 |
+
if not tts_config.api_key:
|
50 |
+
return None
|
51 |
+
|
52 |
+
if tts_config.api_key.startswith("enc:"):
|
53 |
+
from encryption_utils import decrypt
|
54 |
+
return decrypt(tts_config.api_key)
|
55 |
+
|
56 |
+
return tts_config.api_key
|
tts/tts_google.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tts_google.py
|
2 |
+
from google.cloud import texttospeech
|
3 |
+
from ssml_converter import SSMLConverter
|
4 |
+
from logger import log_info, log_error, log_debug, log_warning
|
5 |
+
|
6 |
+
class GoogleCloudTTS(TTSInterface):
|
7 |
+
"""Google Cloud Text-to-Speech implementation"""
|
8 |
+
|
9 |
+
def __init__(self, credentials_path: str):
|
10 |
+
super().__init__()
|
11 |
+
self.supports_ssml = True
|
12 |
+
self.credentials_path = credentials_path
|
13 |
+
|
14 |
+
# Google TTS doesn't need preprocessing with SSML
|
15 |
+
self.preprocessing_flags = set()
|
16 |
+
|
17 |
+
# Initialize client
|
18 |
+
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
|
19 |
+
self.client = texttospeech.TextToSpeechClient()
|
20 |
+
|
21 |
+
# SSML converter
|
22 |
+
self.ssml_converter = SSMLConverter(language="tr-TR")
|
23 |
+
|
24 |
+
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
|
25 |
+
"""Convert text to speech using Google Cloud TTS"""
|
26 |
+
try:
|
27 |
+
# Check if SSML should be used
|
28 |
+
use_ssml = kwargs.get("use_ssml", True)
|
29 |
+
|
30 |
+
if use_ssml and not text.startswith("<speak>"):
|
31 |
+
# Convert to SSML
|
32 |
+
text = self.ssml_converter.convert_to_ssml(text)
|
33 |
+
log_info(f"📝 Converted to SSML: {text[:200]}...")
|
34 |
+
input_text = texttospeech.SynthesisInput(ssml=text)
|
35 |
+
else:
|
36 |
+
input_text = texttospeech.SynthesisInput(text=text)
|
37 |
+
|
38 |
+
# Voice selection
|
39 |
+
voice = texttospeech.VoiceSelectionParams(
|
40 |
+
language_code=kwargs.get("language_code", "tr-TR"),
|
41 |
+
name=voice_id or "tr-TR-Wavenet-B",
|
42 |
+
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
|
43 |
+
)
|
44 |
+
|
45 |
+
# Audio config
|
46 |
+
audio_config = texttospeech.AudioConfig(
|
47 |
+
audio_encoding=texttospeech.AudioEncoding.MP3,
|
48 |
+
speaking_rate=kwargs.get("speaking_rate", 1.0),
|
49 |
+
pitch=kwargs.get("pitch", 0.0),
|
50 |
+
volume_gain_db=kwargs.get("volume_gain_db", 0.0)
|
51 |
+
)
|
52 |
+
|
53 |
+
# Perform synthesis
|
54 |
+
response = self.client.synthesize_speech(
|
55 |
+
input=input_text,
|
56 |
+
voice=voice,
|
57 |
+
audio_config=audio_config
|
58 |
+
)
|
59 |
+
|
60 |
+
log_info(f"✅ Google TTS returned {len(response.audio_content)} bytes")
|
61 |
+
return response.audio_content
|
62 |
+
|
63 |
+
except Exception as e:
|
64 |
+
log_error("❌ Google TTS error", e)
|
65 |
+
raise
|
tts/tts_interface.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
TTS Interface for Flare
|
3 |
+
"""
|
4 |
+
from abc import ABC, abstractmethod
|
5 |
+
from typing import Optional, Dict, Any, Set
|
6 |
+
from datetime import datetime
|
7 |
+
import sys
|
8 |
+
|
9 |
+
class TTSInterface(ABC):
|
10 |
+
"""Abstract base class for TTS providers"""
|
11 |
+
|
12 |
+
def __init__(self):
|
13 |
+
self.preprocessing_flags: Set[str] = set()
|
14 |
+
self.supports_ssml: bool = False
|
15 |
+
|
16 |
+
@abstractmethod
|
17 |
+
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
|
18 |
+
"""
|
19 |
+
Convert text to speech and return audio bytes
|
20 |
+
|
21 |
+
Args:
|
22 |
+
text: Text to convert to speech
|
23 |
+
voice_id: Optional voice ID specific to the provider
|
24 |
+
**kwargs: Additional provider-specific parameters
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
Audio data as bytes (MP3 or WAV format)
|
28 |
+
"""
|
29 |
+
pass
|
30 |
+
|
31 |
+
@abstractmethod
|
32 |
+
def get_supported_voices(self) -> Dict[str, str]:
|
33 |
+
"""Get list of supported voices"""
|
34 |
+
pass
|
35 |
+
|
36 |
+
@abstractmethod
|
37 |
+
def get_provider_name(self) -> str:
|
38 |
+
"""Get provider name for logging"""
|
39 |
+
pass
|
40 |
+
|
41 |
+
def get_preprocessing_flags(self) -> Set[str]:
|
42 |
+
"""Get preprocessing flags for this provider"""
|
43 |
+
return self.preprocessing_flags
|
44 |
+
|
45 |
+
def supports_ssml_format(self) -> bool:
|
46 |
+
"""Check if provider supports SSML"""
|
47 |
+
return self.supports_ssml
|
tts/tts_preprocessor.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
TTS Text Preprocessing Utilities with Multilingual Support
|
3 |
+
"""
|
4 |
+
|
5 |
+
import re
|
6 |
+
import json
|
7 |
+
from typing import Dict, Set, Optional
|
8 |
+
from num2words import num2words
|
9 |
+
from pathlib import Path
|
10 |
+
from locale_manager import LocaleManager
|
11 |
+
|
12 |
+
class TTSPreprocessor:
|
13 |
+
"""Text preprocessor for TTS providers with multilingual support"""
|
14 |
+
|
15 |
+
# Preprocessing flags
|
16 |
+
PREPROCESS_NUMBERS = "numbers"
|
17 |
+
PREPROCESS_CURRENCY = "currency"
|
18 |
+
PREPROCESS_TIME = "time"
|
19 |
+
PREPROCESS_DATE = "date"
|
20 |
+
PREPROCESS_CODES = "codes"
|
21 |
+
PREPROCESS_PERCENTAGE = "percentage"
|
22 |
+
|
23 |
+
def __init__(self, language: str = "tr"):
|
24 |
+
self.language = language
|
25 |
+
self.locale_data = LocaleManager.get_locale(language)
|
26 |
+
|
27 |
+
def preprocess(self, text: str, flags: Set[str]) -> str:
|
28 |
+
"""Apply preprocessing based on flags"""
|
29 |
+
|
30 |
+
if self.PREPROCESS_CURRENCY in flags:
|
31 |
+
text = self._process_currency(text)
|
32 |
+
|
33 |
+
if self.PREPROCESS_TIME in flags:
|
34 |
+
text = self._process_time(text)
|
35 |
+
|
36 |
+
if self.PREPROCESS_DATE in flags:
|
37 |
+
text = self._process_date(text)
|
38 |
+
|
39 |
+
if self.PREPROCESS_CODES in flags:
|
40 |
+
text = self._process_codes(text)
|
41 |
+
|
42 |
+
if self.PREPROCESS_PERCENTAGE in flags:
|
43 |
+
text = self._process_percentage(text)
|
44 |
+
|
45 |
+
# Numbers should be processed last to avoid conflicts
|
46 |
+
if self.PREPROCESS_NUMBERS in flags:
|
47 |
+
text = self._process_numbers(text)
|
48 |
+
|
49 |
+
return text
|
50 |
+
|
51 |
+
def _process_numbers(self, text: str) -> str:
|
52 |
+
"""Convert numbers to words based on locale"""
|
53 |
+
decimal_sep = self.locale_data["numbers"]["decimal_separator"]
|
54 |
+
thousands_sep = self.locale_data["numbers"]["thousands_separator"]
|
55 |
+
decimal_word = self.locale_data["numbers"]["decimal_word"]
|
56 |
+
threshold = self.locale_data.get("small_number_threshold", 100)
|
57 |
+
|
58 |
+
def replace_number(match):
|
59 |
+
num_str = match.group()
|
60 |
+
|
61 |
+
# Normalize number format
|
62 |
+
if self.language == "tr":
|
63 |
+
# Turkish: 1.234,56 -> 1234.56
|
64 |
+
num_str = num_str.replace('.', '').replace(',', '.')
|
65 |
+
else:
|
66 |
+
# English: 1,234.56 -> 1234.56
|
67 |
+
num_str = num_str.replace(',', '')
|
68 |
+
|
69 |
+
try:
|
70 |
+
num = float(num_str)
|
71 |
+
if num.is_integer():
|
72 |
+
num = int(num)
|
73 |
+
|
74 |
+
# Keep small numbers as is based on threshold
|
75 |
+
if isinstance(num, int) and 0 <= num <= threshold:
|
76 |
+
return str(num)
|
77 |
+
|
78 |
+
# Convert large numbers to words
|
79 |
+
if isinstance(num, int):
|
80 |
+
try:
|
81 |
+
return num2words(num, lang=self.language)
|
82 |
+
except NotImplementedError:
|
83 |
+
# Fallback to English if language not supported
|
84 |
+
return num2words(num, lang='en')
|
85 |
+
else:
|
86 |
+
# Handle decimal
|
87 |
+
integer_part = int(num)
|
88 |
+
decimal_part = int((num - integer_part) * 100)
|
89 |
+
|
90 |
+
try:
|
91 |
+
int_words = num2words(integer_part, lang=self.language)
|
92 |
+
dec_words = num2words(decimal_part, lang=self.language)
|
93 |
+
return f"{int_words} {decimal_word} {dec_words}"
|
94 |
+
except NotImplementedError:
|
95 |
+
# Fallback
|
96 |
+
int_words = num2words(integer_part, lang='en')
|
97 |
+
dec_words = num2words(decimal_part, lang='en')
|
98 |
+
return f"{int_words} {decimal_word} {dec_words}"
|
99 |
+
|
100 |
+
except:
|
101 |
+
return num_str
|
102 |
+
|
103 |
+
# Match numbers with locale-specific format
|
104 |
+
if self.language == "tr":
|
105 |
+
pattern = r'\b\d{1,3}(?:\.\d{3})*(?:,\d+)?\b|\b\d+(?:,\d+)?\b'
|
106 |
+
else:
|
107 |
+
pattern = r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b|\b\d+(?:\.\d+)?\b'
|
108 |
+
|
109 |
+
return re.sub(pattern, replace_number, text)
|
110 |
+
|
111 |
+
def _process_codes(self, text: str) -> str:
|
112 |
+
"""Process codes like PNR, flight numbers - language agnostic"""
|
113 |
+
def spell_code(match):
|
114 |
+
code = match.group()
|
115 |
+
return ' '.join(code)
|
116 |
+
|
117 |
+
# Match uppercase letters followed by numbers
|
118 |
+
pattern = r'\b[A-Z]{2,5}\d{2,5}\b'
|
119 |
+
return re.sub(pattern, spell_code, text)
|
120 |
+
|
121 |
+
def _process_currency(self, text: str) -> str:
|
122 |
+
"""Process currency symbols and amounts based on locale"""
|
123 |
+
currency_data = self.locale_data.get("currency", {})
|
124 |
+
|
125 |
+
if not isinstance(currency_data, dict):
|
126 |
+
return text
|
127 |
+
|
128 |
+
symbol = currency_data.get("symbol", "")
|
129 |
+
word = currency_data.get("word", "")
|
130 |
+
code = currency_data.get("code", "")
|
131 |
+
position = currency_data.get("position", "before")
|
132 |
+
|
133 |
+
if symbol and word:
|
134 |
+
# Replace standalone symbols
|
135 |
+
text = text.replace(symbol, f" {word} ")
|
136 |
+
|
137 |
+
# Replace symbol with amount
|
138 |
+
if position == "before":
|
139 |
+
# $100 -> 100 dollar
|
140 |
+
pattern = rf'{re.escape(symbol)}\s*(\d+(?:[.,]\d+)?)'
|
141 |
+
text = re.sub(pattern, rf'\1 {word}', text)
|
142 |
+
else:
|
143 |
+
# 100₺ -> 100 lira
|
144 |
+
pattern = rf'(\d+(?:[.,]\d+)?)\s*{re.escape(symbol)}'
|
145 |
+
text = re.sub(pattern, rf'\1 {word}', text)
|
146 |
+
|
147 |
+
# Process currency codes
|
148 |
+
if code and word:
|
149 |
+
pattern = rf'(\d+(?:[.,]\d+)?)\s*{code}\b'
|
150 |
+
text = re.sub(pattern, rf'\1 {word}', text, flags=re.IGNORECASE)
|
151 |
+
|
152 |
+
return text
|
153 |
+
|
154 |
+
def _process_percentage(self, text: str) -> str:
|
155 |
+
"""Process percentage symbols based on locale"""
|
156 |
+
percentage = self.locale_data.get("percentage", {})
|
157 |
+
|
158 |
+
if not isinstance(percentage, dict):
|
159 |
+
return text
|
160 |
+
|
161 |
+
word = percentage.get("word", "percent")
|
162 |
+
position = percentage.get("position", "after")
|
163 |
+
|
164 |
+
if position == "before":
|
165 |
+
# %50 -> yüzde 50
|
166 |
+
pattern = r'%\s*(\d+(?:[.,]\d+)?)'
|
167 |
+
replacement = rf'{word} \1'
|
168 |
+
else:
|
169 |
+
# 50% -> 50 percent
|
170 |
+
pattern = r'(\d+(?:[.,]\d+)?)\s*%'
|
171 |
+
replacement = rf'\1 {word}'
|
172 |
+
|
173 |
+
return re.sub(pattern, replacement, text)
|
174 |
+
|
175 |
+
def _process_date(self, text: str) -> str:
|
176 |
+
"""Process date formats based on locale"""
|
177 |
+
months = self.locale_data.get("months", {})
|
178 |
+
date_format = self.locale_data.get("date_format", "YYYY-MM-DD")
|
179 |
+
|
180 |
+
if not isinstance(months, dict):
|
181 |
+
return text
|
182 |
+
|
183 |
+
# Convert ISO format dates
|
184 |
+
def replace_date(match):
|
185 |
+
year, month, day = match.groups()
|
186 |
+
month_name = months.get(month, month)
|
187 |
+
|
188 |
+
# Format based on locale preference
|
189 |
+
if "DD.MM.YYYY" in date_format:
|
190 |
+
# Turkish format with month name
|
191 |
+
return f"{int(day)} {month_name} {year}"
|
192 |
+
elif "MM/DD/YYYY" in date_format:
|
193 |
+
# US format with month name
|
194 |
+
return f"{month_name} {int(day)}, {year}"
|
195 |
+
else:
|
196 |
+
return match.group()
|
197 |
+
|
198 |
+
pattern = r'(\d{4})-(\d{2})-(\d{2})'
|
199 |
+
return re.sub(pattern, replace_date, text)
|
200 |
+
|
201 |
+
def _process_time(self, text: str) -> str:
|
202 |
+
"""Process time formats based on locale"""
|
203 |
+
time_data = self.locale_data.get("time", {})
|
204 |
+
|
205 |
+
if not isinstance(time_data, dict):
|
206 |
+
time_format = "word"
|
207 |
+
separator = " "
|
208 |
+
else:
|
209 |
+
time_format = time_data.get("format", "word")
|
210 |
+
separator = time_data.get("separator", " ")
|
211 |
+
|
212 |
+
def replace_time(match):
|
213 |
+
hour, minute = match.groups()
|
214 |
+
hour_int = int(hour)
|
215 |
+
minute_int = int(minute)
|
216 |
+
|
217 |
+
if time_format == "word":
|
218 |
+
try:
|
219 |
+
hour_word = num2words(hour_int, lang=self.language)
|
220 |
+
minute_word = num2words(minute_int, lang=self.language) if minute_int > 0 else ""
|
221 |
+
|
222 |
+
if minute_int == 0:
|
223 |
+
return hour_word
|
224 |
+
else:
|
225 |
+
return f"{hour_word}{separator}{minute_word}"
|
226 |
+
except NotImplementedError:
|
227 |
+
return f"{hour} {minute}"
|
228 |
+
else:
|
229 |
+
return f"{hour} {minute}"
|
230 |
+
|
231 |
+
pattern = r'(\d{1,2}):(\d{2})'
|
232 |
+
return re.sub(pattern, replace_time, text)
|