Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on 29 days ago

Commit

b861704

verified ·

1 Parent(s): 1620c6f

Update tts_interface.py

Browse files

Files changed (1) hide show

tts_interface.py +52 -51

tts_interface.py CHANGED Viewed

@@ -3,7 +3,7 @@ TTS Interface and Implementations
 """
 from abc import ABC, abstractmethod
-from typing import Optional, Dict, Any
 import httpx
 import os
 from datetime import datetime
@@ -33,12 +33,11 @@ class TTSInterface(ABC):
         """Get list of supported voices"""
         pass
-    @abstractmethod
     def get_preprocessing_flags(self) -> Set[str]:
         """Get preprocessing flags for this provider"""
-        pass
-    def supports_ssml(self) -> bool:
         """Check if provider supports SSML"""
         return self.supports_ssml
@@ -46,6 +45,7 @@ class ElevenLabsTTS(TTSInterface):
     """ElevenLabs TTS implementation"""
     def __init__(self, api_key: str):
         self.api_key = api_key
         self.base_url = "https://api.elevenlabs.io/v1"
         self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
@@ -60,54 +60,58 @@ class ElevenLabsTTS(TTSInterface):
         }
         self.preprocessor = TTSPreprocessor(language="tr")
-        # Debug log
-        masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
-        log(f"🔑 ElevenLabsTTS initialized with key: {masked_key}")
-    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
         """Convert text to speech using ElevenLabs API"""
-        try:
-            # Apply preprocessing if not disabled
-            if kwargs.get("disable_preprocessing", False) != True:
-                text = self.preprocessor.preprocess(text, self.preprocessing_flags)
-                log(f"📝 Preprocessed text: {text[:100]}...")
-            voice = voice_id or self.default_voice_id
-            url = f"{self.base_url}/text-to-speech/{voice}"
-            headers = {
-                "xi-api-key": self.api_key,
-                "Content-Type": "application/json"
-            }
-            # Default parameters
-            data = {
-                "text": text,
-                "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
-                "voice_settings": kwargs.get("voice_settings", {
-                "stability": 1,
-                "similarity_boost": 0.85,
-                "style": 0.7,
-                "speed": 1.14,
                 "use_speaker_boost": True
-                })
             }
-            # Add optional parameters
-            if "output_format" in kwargs:
-                params = {"output_format": kwargs["output_format"]}
-            else:
-                params = {"output_format": "mp3_44100_128"}
-            log(f"🎤 Calling ElevenLabs TTS for {len(text)} characters")
-            async with httpx.AsyncClient(timeout=30) as client:
                 response = await client.post(
-                    url,
                     headers=headers,
-                    json=data,
-                    params=params
                 )
                 response.raise_for_status()
@@ -134,14 +138,11 @@ class ElevenLabsTTS(TTSInterface):
             "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
         }
-    def get_preprocessing_flags(self) -> Set[str]:
-        """Get preprocessing flags for ElevenLabs"""
-        return self.preprocessing_flags
 class BlazeTTS(TTSInterface):
     """Placeholder for future Blaze TTS implementation"""
     def __init__(self, api_key: str):
         self.api_key = api_key
     async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:

 """
 from abc import ABC, abstractmethod
+from typing import Optional, Dict, Any, Set
 import httpx
 import os
 from datetime import datetime
         """Get list of supported voices"""
         pass
     def get_preprocessing_flags(self) -> Set[str]:
         """Get preprocessing flags for this provider"""
+        return self.preprocessing_flags
+    def supports_ssml_format(self) -> bool:
         """Check if provider supports SSML"""
         return self.supports_ssml
     """ElevenLabs TTS implementation"""
     def __init__(self, api_key: str):
+        super().__init__()
         self.api_key = api_key
         self.base_url = "https://api.elevenlabs.io/v1"
         self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
         }
         self.preprocessor = TTSPreprocessor(language="tr")
+    async def synthesize(
+        self,
+        text: str,
+        voice_id: Optional[str] = None,
+        model_id: Optional[str] = None,
+        output_format: Optional[str] = None,
+        **kwargs
+    ) -> bytes:
         """Convert text to speech using ElevenLabs API"""
+        # Preprocess text
+        processed_text = self.preprocessor.process(text, self.preprocessing_flags)
+        # Use defaults if not provided
+        voice_id = voice_id or self.default_voice_id
+        model_id = model_id or "eleven_multilingual_v2"
+        output_format = output_format or "mp3_44100_128"
+        url = f"{self.base_url}/text-to-speech/{voice_id}"
+        headers = {
+            "Accept": "audio/mpeg",
+            "Content-Type": "application/json",
+            "xi-api-key": self.api_key
+        }
+        data = {
+            "text": processed_text,
+            "model_id": model_id,
+            "voice_settings": {
+                "stability": 0.5,
+                "similarity_boost": 0.75,
+                "style": 0.0,
                 "use_speaker_boost": True
             }
+        }
+        # Add output format to URL if specified
+        if output_format:
+            url += f"?output_format={output_format}"
+        try:
+            async with httpx.AsyncClient() as client:
+                log(f"🎤 ElevenLabs TTS request: voice={voice_id}, model={model_id}")
+                log(f"📝 Text (first 100 chars): {processed_text[:100]}...")
                 response = await client.post(
+                    url,
+                    json=data,
                     headers=headers,
+                    timeout=30.0
                 )
                 response.raise_for_status()
             "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
         }
 class BlazeTTS(TTSInterface):
     """Placeholder for future Blaze TTS implementation"""
     def __init__(self, api_key: str):
+        super().__init__()
         self.api_key = api_key
     async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: