Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 26 days ago

Commit

312aa6f

verified ·

1 Parent(s): 71406b0

Update tts_interface.py

Browse files

Files changed (1) hide show

tts_interface.py +18 -132

tts_interface.py CHANGED Viewed

@@ -1,16 +1,11 @@
 """
-TTS Interface and Implementations
 """
 from abc import ABC, abstractmethod
 from typing import Optional, Dict, Any, Set
-import httpx
-import os
 from datetime import datetime
 import sys
-from tts_preprocessor import TTSPreprocessor
 def log(message: str):
     timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
     print(f"[{timestamp}] {message}")
@@ -25,7 +20,17 @@ class TTSInterface(ABC):
     @abstractmethod
     async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
-        """Convert text to speech and return audio bytes"""
         pass
     @abstractmethod
@@ -33,134 +38,15 @@ class TTSInterface(ABC):
         """Get list of supported voices"""
         pass
     def get_preprocessing_flags(self) -> Set[str]:
         """Get preprocessing flags for this provider"""
         return self.preprocessing_flags
     def supports_ssml_format(self) -> bool:
         """Check if provider supports SSML"""
-        return self.supports_ssml
-class ElevenLabsTTS(TTSInterface):
-    """ElevenLabs TTS implementation"""
-    def __init__(self, api_key: str):
-        super().__init__()
-        self.api_key = api_key.strip()  # Başındaki/sonundaki boşlukları temizle
-        self.base_url = "https://api.elevenlabs.io/v1"
-        self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
-        # ElevenLabs için preprocessing gereken alanlar
-        self.preprocessing_flags = {
-            TTSPreprocessor.PREPROCESS_NUMBERS,    # Büyük sayılar
-            TTSPreprocessor.PREPROCESS_CURRENCY,   # Para birimleri
-            TTSPreprocessor.PREPROCESS_TIME,       # Saat formatı
-            TTSPreprocessor.PREPROCESS_CODES,      # PNR kodları
-            TTSPreprocessor.PREPROCESS_PERCENTAGE  # Yüzdeler
-        }
-        # tr-TR -> tr dönüşümü
-        self.preprocessor = TTSPreprocessor(language="tr")
-    async def synthesize(
-        self,
-        text: str,
-        voice_id: Optional[str] = None,
-        model_id: Optional[str] = None,
-        output_format: Optional[str] = None,
-        **kwargs
-    ) -> bytes:
-        """Convert text to speech using ElevenLabs API"""
-        # Preprocess text
-        processed_text = self.preprocessor.preprocess(text, self.preprocessing_flags)
-        # Use defaults if not provided
-        voice_id = voice_id or self.default_voice_id
-        model_id = model_id or "eleven_multilingual_v2"
-        output_format = output_format or "mp3_44100_128"
-        url = f"{self.base_url}/text-to-speech/{voice_id}"
-        headers = {
-            "Accept": "audio/mpeg",
-            "Content-Type": "application/json",
-            "xi-api-key": self.api_key
-        }
-        data = {
-            "text": processed_text,
-            "model_id": model_id,
-            "voice_settings": {
-                "stability": 0.5,
-                "similarity_boost": 0.75,
-                "style": 0.0,
-                "use_speaker_boost": True
-            }
-        }
-        # Add output format to URL if specified
-        if output_format:
-            url += f"?output_format={output_format}"
-        try:
-            async with httpx.AsyncClient() as client:
-                log(f"🎤 ElevenLabs TTS request: voice={voice_id}, model={model_id}")
-                log(f"📝 Text (first 100 chars): {processed_text[:100]}...")
-                response = await client.post(
-                    url,
-                    json=data,
-                    headers=headers,
-                    timeout=30.0
-                )
-                response.raise_for_status()
-                audio_data = response.content
-                log(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
-                return audio_data
-        except httpx.HTTPStatusError as e:
-            log(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
-            raise
-        except Exception as e:
-            log(f"❌ TTS synthesis error: {e}")
-            raise
-    def get_supported_voices(self) -> Dict[str, str]:
-        """Get default voices - full list can be fetched from API"""
-        return {
-            "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
-            "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
-            "ErXwobaYiN019PkySvjV": "Antoni (Male)",
-            "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
-            "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
-            "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
-        }
-class BlazeTTS(TTSInterface):
-    """Placeholder for future Blaze TTS implementation"""
-    def __init__(self, api_key: str):
-        super().__init__()
-        self.api_key = api_key
-    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
-        raise NotImplementedError("Blaze TTS not implemented yet")
-    def get_supported_voices(self) -> Dict[str, str]:
-        return {}
-def create_tts_provider(engine: str, api_key: Optional[str] = None) -> Optional[TTSInterface]:
-    """Factory function to create TTS provider instances"""
-    if engine == "elevenlabs" and api_key:
-        return ElevenLabsTTS(api_key)
-    elif engine == "blaze" and api_key:
-        return BlazeTTS(api_key)
-    elif engine == "no_tts":
-        return None
-    else:
-        log(f"⚠️ Unknown or unconfigured TTS engine: {engine}")
-        return None

 """
+TTS Interface for Flare
 """
 from abc import ABC, abstractmethod
 from typing import Optional, Dict, Any, Set
 from datetime import datetime
 import sys
 def log(message: str):
     timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
     print(f"[{timestamp}] {message}")
     @abstractmethod
     async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
+        """
+        Convert text to speech and return audio bytes
+        Args:
+            text: Text to convert to speech
+            voice_id: Optional voice ID specific to the provider
+            **kwargs: Additional provider-specific parameters
+        Returns:
+            Audio data as bytes (MP3 or WAV format)
+        """
         pass
     @abstractmethod
         """Get list of supported voices"""
         pass
+    @abstractmethod
+    def get_provider_name(self) -> str:
+        """Get provider name for logging"""
+        pass
     def get_preprocessing_flags(self) -> Set[str]:
         """Get preprocessing flags for this provider"""
         return self.preprocessing_flags
     def supports_ssml_format(self) -> bool:
         """Check if provider supports SSML"""
+        return self.supports_ssml