Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 10 days ago

Commit

9874d4d

verified ·

1 Parent(s): 1ad909f

Upload 6 files

Browse files

Files changed (6) hide show

tts/tts_blaze.py +26 -0
tts/tts_elevenlabs.py +109 -0
tts/tts_factory.py +56 -0
tts/tts_google.py +65 -0
tts/tts_interface.py +47 -0
tts/tts_preprocessor.py +232 -0

tts/tts_blaze.py ADDED Viewed

	@@ -0,0 +1,26 @@

+"""
+Blaze TTS Implementation (Placeholder)
+"""
+from typing import Optional, Dict
+from tts_interface import TTSInterface
+from logger import log_info, log_error, log_debug, log_warning
+class BlazeTTS(TTSInterface):
+    """Placeholder for future Blaze TTS implementation"""
+    def __init__(self, api_key: str):
+        super().__init__()
+        self.api_key = api_key
+        log_warning("⚠️ BlazeTTS initialized (not implemented yet)")
+    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
+        """Not implemented yet"""
+        raise NotImplementedError("Blaze TTS not implemented yet")
+    def get_supported_voices(self) -> Dict[str, str]:
+        """Get supported voices"""
+        return {}
+    def get_provider_name(self) -> str:
+        """Get provider name"""
+        return "blaze"

tts/tts_elevenlabs.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""
+ElevenLabs TTS Implementation
+"""
+import httpx
+from typing import Optional, Dict
+from tts_interface import TTSInterface
+from logger import log_info, log_error, log_debug, log_warning
+class ElevenLabsTTS(TTSInterface):
+    """ElevenLabs TTS implementation"""
+    def __init__(self, api_key: str):
+        super().__init__()
+        self.api_key = api_key.strip()
+        self.base_url = "https://api.elevenlabs.io/v1"
+        self.default_voice_id = "2thYbn2sOGtiTwd9QwWH"  # Avencia
+        # ElevenLabs preprocessing needs
+        self.preprocessing_flags = {
+            "PREPROCESS_NUMBERS",    # Large numbers
+            "PREPROCESS_CURRENCY",   # Currency amounts
+            "PREPROCESS_TIME",       # Time format
+            "PREPROCESS_CODES",      # PNR/codes
+            "PREPROCESS_PHONE"       # Phone numbers
+        }
+        # Debug log
+        masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
+        log_debug(f"🔑 ElevenLabsTTS initialized with key: {masked_key}")
+    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
+        """Convert text to speech using ElevenLabs API"""
+        try:
+            voice = voice_id or self.default_voice_id
+            url = f"{self.base_url}/text-to-speech/{voice}"
+            headers = {
+                "xi-api-key": self.api_key,
+                "Content-Type": "application/json"
+            }
+            # Default parameters
+            data = {
+                "text": text,
+                "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
+                "voice_settings": kwargs.get("voice_settings", {
+                    "stability": 1,
+                    "similarity_boost": 0.85,
+                    "style": 0.7,
+                    "speed": 1.14,
+                    "use_speaker_boost": True
+                })
+            }
+            # Add optional parameters
+            if "output_format" in kwargs:
+                params = {"output_format": kwargs["output_format"]}
+            else:
+                params = {"output_format": "mp3_44100_128"}
+            log_debug(f"🎤 Calling ElevenLabs TTS for {len(text)} characters")
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                response = await client.post(
+                    url,
+                    headers=headers,
+                    json=data,
+                    params=params
+                )
+                response.raise_for_status()
+                audio_data = response.content  # This should be bytes
+                # Ensure we're returning bytes
+                if isinstance(audio_data, str):
+                    log_warning("ElevenLabs returned string instead of bytes")
+                    # Try to decode if it's base64
+                    try:
+                        audio_data = base64.b64decode(audio_data)
+                    except:
+                        pass
+                log_debug(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
+                log_debug(f"Audio data type: {type(audio_data)}")
+                return audio_data
+        except httpx.HTTPStatusError as e:
+            log_error(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
+            raise
+        except Exception as e:
+            log_error("❌ TTS synthesis error", e)
+            raise
+    def get_supported_voices(self) -> Dict[str, str]:
+        """Get default voices - full list can be fetched from API"""
+        return {
+            "2thYbn2sOGtiTwd9QwWH": "Avencia (Female - Turkish)",
+            "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
+            "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
+            "ErXwobaYiN019PkySvjV": "Antoni (Male)",
+            "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
+            "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
+            "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
+        }
+    def get_provider_name(self) -> str:
+        """Get provider name"""
+        return "elevenlabs"

tts/tts_factory.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""
+TTS Provider Factory for Flare
+"""
+from typing import Optional
+from tts_interface import TTSInterface
+from tts_elevenlabs import ElevenLabsTTS
+from tts_blaze import BlazeTTS
+from config_provider import ConfigProvider
+from logger import log_info, log_error, log_debug, log_warning
+class TTSFactory:
+    @staticmethod
+    def create_provider() -> Optional[TTSInterface]:
+        """Create TTS provider based on configuration"""
+        cfg = ConfigProvider.get()
+        tts_config = cfg.global_config.tts_provider
+        if not tts_config or tts_config.name == "no_tts":
+            log_info("🔇 No TTS provider configured")
+            return None
+        provider_name = tts_config.name
+        log_info(f"🏭 Creating TTS provider: {provider_name}")
+        # Get provider definition
+        provider_def = cfg.global_config.get_provider_config("tts", provider_name)
+        if not provider_def:
+            log_info(f"⚠️ Unknown TTS provider: {provider_name}")
+            return None
+        # Get API key
+        api_key = TTSFactory._get_api_key(tts_config)
+        if not api_key and provider_def.requires_api_key:
+            log_info(f"⚠️ No API key for TTS provider: {provider_name}")
+            return None
+        # Create provider based on name
+        if provider_name == "elevenlabs":
+            return ElevenLabsTTS(api_key)
+        elif provider_name == "blaze":
+            return BlazeTTS(api_key)
+        else:
+            log_info(f"⚠️ Unsupported TTS provider: {provider_name}")
+            return None
+    @staticmethod
+    def _get_api_key(tts_config) -> Optional[str]:
+        """Get decrypted API key"""
+        if not tts_config.api_key:
+            return None
+        if tts_config.api_key.startswith("enc:"):
+            from encryption_utils import decrypt
+            return decrypt(tts_config.api_key)
+        return tts_config.api_key

tts/tts_google.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# tts_google.py
+from google.cloud import texttospeech
+from ssml_converter import SSMLConverter
+from logger import log_info, log_error, log_debug, log_warning
+class GoogleCloudTTS(TTSInterface):
+    """Google Cloud Text-to-Speech implementation"""
+    def __init__(self, credentials_path: str):
+        super().__init__()
+        self.supports_ssml = True
+        self.credentials_path = credentials_path
+        # Google TTS doesn't need preprocessing with SSML
+        self.preprocessing_flags = set()
+        # Initialize client
+        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
+        self.client = texttospeech.TextToSpeechClient()
+        # SSML converter
+        self.ssml_converter = SSMLConverter(language="tr-TR")
+    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
+        """Convert text to speech using Google Cloud TTS"""
+        try:
+            # Check if SSML should be used
+            use_ssml = kwargs.get("use_ssml", True)
+            if use_ssml and not text.startswith("<speak>"):
+                # Convert to SSML
+                text = self.ssml_converter.convert_to_ssml(text)
+                log_info(f"📝 Converted to SSML: {text[:200]}...")
+                input_text = texttospeech.SynthesisInput(ssml=text)
+            else:
+                input_text = texttospeech.SynthesisInput(text=text)
+            # Voice selection
+            voice = texttospeech.VoiceSelectionParams(
+                language_code=kwargs.get("language_code", "tr-TR"),
+                name=voice_id or "tr-TR-Wavenet-B",
+                ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
+            )
+            # Audio config
+            audio_config = texttospeech.AudioConfig(
+                audio_encoding=texttospeech.AudioEncoding.MP3,
+                speaking_rate=kwargs.get("speaking_rate", 1.0),
+                pitch=kwargs.get("pitch", 0.0),
+                volume_gain_db=kwargs.get("volume_gain_db", 0.0)
+            )
+            # Perform synthesis
+            response = self.client.synthesize_speech(
+                input=input_text,
+                voice=voice,
+                audio_config=audio_config
+            )
+            log_info(f"✅ Google TTS returned {len(response.audio_content)} bytes")
+            return response.audio_content
+        except Exception as e:
+            log_error("❌ Google TTS error", e)
+            raise

tts/tts_interface.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""
+TTS Interface for Flare
+"""
+from abc import ABC, abstractmethod
+from typing import Optional, Dict, Any, Set
+from datetime import datetime
+import sys
+class TTSInterface(ABC):
+    """Abstract base class for TTS providers"""
+    def __init__(self):
+        self.preprocessing_flags: Set[str] = set()
+        self.supports_ssml: bool = False
+    @abstractmethod
+    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
+        """
+        Convert text to speech and return audio bytes
+        Args:
+            text: Text to convert to speech
+            voice_id: Optional voice ID specific to the provider
+            **kwargs: Additional provider-specific parameters
+        Returns:
+            Audio data as bytes (MP3 or WAV format)
+        """
+        pass
+    @abstractmethod
+    def get_supported_voices(self) -> Dict[str, str]:
+        """Get list of supported voices"""
+        pass
+    @abstractmethod
+    def get_provider_name(self) -> str:
+        """Get provider name for logging"""
+        pass
+    def get_preprocessing_flags(self) -> Set[str]:
+        """Get preprocessing flags for this provider"""
+        return self.preprocessing_flags
+    def supports_ssml_format(self) -> bool:
+        """Check if provider supports SSML"""
+        return self.supports_ssml

tts/tts_preprocessor.py ADDED Viewed

	@@ -0,0 +1,232 @@

+"""
+TTS Text Preprocessing Utilities with Multilingual Support
+"""
+import re
+import json
+from typing import Dict, Set, Optional
+from num2words import num2words
+from pathlib import Path
+from locale_manager import LocaleManager
+class TTSPreprocessor:
+    """Text preprocessor for TTS providers with multilingual support"""
+    # Preprocessing flags
+    PREPROCESS_NUMBERS = "numbers"
+    PREPROCESS_CURRENCY = "currency"
+    PREPROCESS_TIME = "time"
+    PREPROCESS_DATE = "date"
+    PREPROCESS_CODES = "codes"
+    PREPROCESS_PERCENTAGE = "percentage"
+    def __init__(self, language: str = "tr"):
+        self.language = language
+        self.locale_data = LocaleManager.get_locale(language)
+    def preprocess(self, text: str, flags: Set[str]) -> str:
+        """Apply preprocessing based on flags"""
+        if self.PREPROCESS_CURRENCY in flags:
+            text = self._process_currency(text)
+        if self.PREPROCESS_TIME in flags:
+            text = self._process_time(text)
+        if self.PREPROCESS_DATE in flags:
+            text = self._process_date(text)
+        if self.PREPROCESS_CODES in flags:
+            text = self._process_codes(text)
+        if self.PREPROCESS_PERCENTAGE in flags:
+            text = self._process_percentage(text)
+        # Numbers should be processed last to avoid conflicts
+        if self.PREPROCESS_NUMBERS in flags:
+            text = self._process_numbers(text)
+        return text
+    def _process_numbers(self, text: str) -> str:
+        """Convert numbers to words based on locale"""
+        decimal_sep = self.locale_data["numbers"]["decimal_separator"]
+        thousands_sep = self.locale_data["numbers"]["thousands_separator"]
+        decimal_word = self.locale_data["numbers"]["decimal_word"]
+        threshold = self.locale_data.get("small_number_threshold", 100)
+        def replace_number(match):
+            num_str = match.group()
+            # Normalize number format
+            if self.language == "tr":
+                # Turkish: 1.234,56 -> 1234.56
+                num_str = num_str.replace('.', '').replace(',', '.')
+            else:
+                # English: 1,234.56 -> 1234.56
+                num_str = num_str.replace(',', '')
+            try:
+                num = float(num_str)
+                if num.is_integer():
+                    num = int(num)
+                # Keep small numbers as is based on threshold
+                if isinstance(num, int) and 0 <= num <= threshold:
+                    return str(num)
+                # Convert large numbers to words
+                if isinstance(num, int):
+                    try:
+                        return num2words(num, lang=self.language)
+                    except NotImplementedError:
+                        # Fallback to English if language not supported
+                        return num2words(num, lang='en')
+                else:
+                    # Handle decimal
+                    integer_part = int(num)
+                    decimal_part = int((num - integer_part) * 100)
+                    try:
+                        int_words = num2words(integer_part, lang=self.language)
+                        dec_words = num2words(decimal_part, lang=self.language)
+                        return f"{int_words} {decimal_word} {dec_words}"
+                    except NotImplementedError:
+                        # Fallback
+                        int_words = num2words(integer_part, lang='en')
+                        dec_words = num2words(decimal_part, lang='en')
+                        return f"{int_words} {decimal_word} {dec_words}"
+            except:
+                return num_str
+        # Match numbers with locale-specific format
+        if self.language == "tr":
+            pattern = r'\b\d{1,3}(?:\.\d{3})*(?:,\d+)?\b|\b\d+(?:,\d+)?\b'
+        else:
+            pattern = r'\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b|\b\d+(?:\.\d+)?\b'
+        return re.sub(pattern, replace_number, text)
+    def _process_codes(self, text: str) -> str:
+        """Process codes like PNR, flight numbers - language agnostic"""
+        def spell_code(match):
+            code = match.group()
+            return ' '.join(code)
+        # Match uppercase letters followed by numbers
+        pattern = r'\b[A-Z]{2,5}\d{2,5}\b'
+        return re.sub(pattern, spell_code, text)
+    def _process_currency(self, text: str) -> str:
+        """Process currency symbols and amounts based on locale"""
+        currency_data = self.locale_data.get("currency", {})
+        if not isinstance(currency_data, dict):
+            return text
+        symbol = currency_data.get("symbol", "")
+        word = currency_data.get("word", "")
+        code = currency_data.get("code", "")
+        position = currency_data.get("position", "before")
+        if symbol and word:
+            # Replace standalone symbols
+            text = text.replace(symbol, f" {word} ")
+            # Replace symbol with amount
+            if position == "before":
+                # $100 -> 100 dollar
+                pattern = rf'{re.escape(symbol)}\s*(\d+(?:[.,]\d+)?)'
+                text = re.sub(pattern, rf'\1 {word}', text)
+            else:
+                # 100₺ -> 100 lira
+                pattern = rf'(\d+(?:[.,]\d+)?)\s*{re.escape(symbol)}'
+                text = re.sub(pattern, rf'\1 {word}', text)
+        # Process currency codes
+        if code and word:
+            pattern = rf'(\d+(?:[.,]\d+)?)\s*{code}\b'
+            text = re.sub(pattern, rf'\1 {word}', text, flags=re.IGNORECASE)
+        return text
+    def _process_percentage(self, text: str) -> str:
+        """Process percentage symbols based on locale"""
+        percentage = self.locale_data.get("percentage", {})
+        if not isinstance(percentage, dict):
+            return text
+        word = percentage.get("word", "percent")
+        position = percentage.get("position", "after")
+        if position == "before":
+            # %50 -> yüzde 50
+            pattern = r'%\s*(\d+(?:[.,]\d+)?)'
+            replacement = rf'{word} \1'
+        else:
+            # 50% -> 50 percent
+            pattern = r'(\d+(?:[.,]\d+)?)\s*%'
+            replacement = rf'\1 {word}'
+        return re.sub(pattern, replacement, text)
+    def _process_date(self, text: str) -> str:
+        """Process date formats based on locale"""
+        months = self.locale_data.get("months", {})
+        date_format = self.locale_data.get("date_format", "YYYY-MM-DD")
+        if not isinstance(months, dict):
+            return text
+        # Convert ISO format dates
+        def replace_date(match):
+            year, month, day = match.groups()
+            month_name = months.get(month, month)
+            # Format based on locale preference
+            if "DD.MM.YYYY" in date_format:
+                # Turkish format with month name
+                return f"{int(day)} {month_name} {year}"
+            elif "MM/DD/YYYY" in date_format:
+                # US format with month name
+                return f"{month_name} {int(day)}, {year}"
+            else:
+                return match.group()
+        pattern = r'(\d{4})-(\d{2})-(\d{2})'
+        return re.sub(pattern, replace_date, text)
+    def _process_time(self, text: str) -> str:
+        """Process time formats based on locale"""
+        time_data = self.locale_data.get("time", {})
+        if not isinstance(time_data, dict):
+            time_format = "word"
+            separator = " "
+        else:
+            time_format = time_data.get("format", "word")
+            separator = time_data.get("separator", " ")
+        def replace_time(match):
+            hour, minute = match.groups()
+            hour_int = int(hour)
+            minute_int = int(minute)
+            if time_format == "word":
+                try:
+                    hour_word = num2words(hour_int, lang=self.language)
+                    minute_word = num2words(minute_int, lang=self.language) if minute_int > 0 else ""
+                    if minute_int == 0:
+                        return hour_word
+                    else:
+                        return f"{hour_word}{separator}{minute_word}"
+                except NotImplementedError:
+                    return f"{hour} {minute}"
+            else:
+                return f"{hour} {minute}"
+        pattern = r'(\d{1,2}):(\d{2})'
+        return re.sub(pattern, replace_time, text)