Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

ciyidogan commited on Jun 15

Commit

f563475

verified ·

1 Parent(s): e6d70ab

Create tts_interface.py

Browse files

Files changed (1) hide show

tts_interface.py +136 -0

tts_interface.py ADDED Viewed

	@@ -0,0 +1,136 @@

+"""
+TTS Interface and Implementations
+"""
+from abc import ABC, abstractmethod
+from typing import Optional, Dict, Any
+import httpx
+import os
+from datetime import datetime
+import sys
+def log(message: str):
+    timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+    print(f"[{timestamp}] {message}")
+    sys.stdout.flush()
+class TTSInterface(ABC):
+    """Abstract base class for TTS providers"""
+    @abstractmethod
+    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
+        """
+        Convert text to speech and return audio bytes
+        Args:
+            text: Text to convert to speech
+            voice_id: Optional voice ID specific to the provider
+            **kwargs: Additional provider-specific parameters
+        Returns:
+            Audio data as bytes (MP3 or WAV format)
+        """
+        pass
+    @abstractmethod
+    def get_supported_voices(self) -> Dict[str, str]:
+        """Get list of supported voices"""
+        pass
+class ElevenLabsTTS(TTSInterface):
+    """ElevenLabs TTS implementation"""
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+        self.base_url = "https://api.elevenlabs.io/v1"
+        self.default_voice_id = "21m00Tcm4TlvDq8ikWAM"  # Rachel voice
+    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
+        """Convert text to speech using ElevenLabs API"""
+        try:
+            voice = voice_id or self.default_voice_id
+            url = f"{self.base_url}/text-to-speech/{voice}"
+            headers = {
+                "xi-api-key": self.api_key,
+                "Content-Type": "application/json"
+            }
+            # Default parameters
+            data = {
+                "text": text,
+                "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
+                "voice_settings": kwargs.get("voice_settings", {
+                    "stability": 0.5,
+                    "similarity_boost": 0.75,
+                    "style": 0,
+                    "use_speaker_boost": True
+                })
+            }
+            # Add optional parameters
+            if "output_format" in kwargs:
+                params = {"output_format": kwargs["output_format"]}
+            else:
+                params = {"output_format": "mp3_44100_128"}
+            log(f"🎤 Calling ElevenLabs TTS for {len(text)} characters")
+            async with httpx.AsyncClient(timeout=30) as client:
+                response = await client.post(
+                    url,
+                    headers=headers,
+                    json=data,
+                    params=params
+                )
+                response.raise_for_status()
+                audio_data = response.content
+                log(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
+                return audio_data
+        except httpx.HTTPStatusError as e:
+            log(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
+            raise
+        except Exception as e:
+            log(f"❌ TTS synthesis error: {e}")
+            raise
+    def get_supported_voices(self) -> Dict[str, str]:
+        """Get default voices - full list can be fetched from API"""
+        return {
+            "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
+            "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
+            "ErXwobaYiN019PkySvjV": "Antoni (Male)",
+            "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
+            "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
+            "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
+        }
+class BlazeTTS(TTSInterface):
+    """Placeholder for future Blaze TTS implementation"""
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
+        raise NotImplementedError("Blaze TTS not implemented yet")
+    def get_supported_voices(self) -> Dict[str, str]:
+        return {}
+def create_tts_provider(engine: str, api_key: Optional[str] = None) -> Optional[TTSInterface]:
+    """Factory function to create TTS provider instances"""
+    if engine == "elevenlabs" and api_key:
+        return ElevenLabsTTS(api_key)
+    elif engine == "blaze" and api_key:
+        return BlazeTTS(api_key)
+    elif engine == "no_tts":
+        return None
+    else:
+        log(f"⚠️ Unknown or unconfigured TTS engine: {engine}")
+        return None