ciyidogan commited on
Commit
f563475
·
verified ·
1 Parent(s): e6d70ab

Create tts_interface.py

Browse files
Files changed (1) hide show
  1. tts_interface.py +136 -0
tts_interface.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ TTS Interface and Implementations
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Optional, Dict, Any
7
+ import httpx
8
+ import os
9
+ from datetime import datetime
10
+ import sys
11
+
12
+ def log(message: str):
13
+ timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
14
+ print(f"[{timestamp}] {message}")
15
+ sys.stdout.flush()
16
+
17
+ class TTSInterface(ABC):
18
+ """Abstract base class for TTS providers"""
19
+
20
+ @abstractmethod
21
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
22
+ """
23
+ Convert text to speech and return audio bytes
24
+
25
+ Args:
26
+ text: Text to convert to speech
27
+ voice_id: Optional voice ID specific to the provider
28
+ **kwargs: Additional provider-specific parameters
29
+
30
+ Returns:
31
+ Audio data as bytes (MP3 or WAV format)
32
+ """
33
+ pass
34
+
35
+ @abstractmethod
36
+ def get_supported_voices(self) -> Dict[str, str]:
37
+ """Get list of supported voices"""
38
+ pass
39
+
40
+
41
+ class ElevenLabsTTS(TTSInterface):
42
+ """ElevenLabs TTS implementation"""
43
+
44
+ def __init__(self, api_key: str):
45
+ self.api_key = api_key
46
+ self.base_url = "https://api.elevenlabs.io/v1"
47
+ self.default_voice_id = "21m00Tcm4TlvDq8ikWAM" # Rachel voice
48
+
49
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
50
+ """Convert text to speech using ElevenLabs API"""
51
+ try:
52
+ voice = voice_id or self.default_voice_id
53
+ url = f"{self.base_url}/text-to-speech/{voice}"
54
+
55
+ headers = {
56
+ "xi-api-key": self.api_key,
57
+ "Content-Type": "application/json"
58
+ }
59
+
60
+ # Default parameters
61
+ data = {
62
+ "text": text,
63
+ "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
64
+ "voice_settings": kwargs.get("voice_settings", {
65
+ "stability": 0.5,
66
+ "similarity_boost": 0.75,
67
+ "style": 0,
68
+ "use_speaker_boost": True
69
+ })
70
+ }
71
+
72
+ # Add optional parameters
73
+ if "output_format" in kwargs:
74
+ params = {"output_format": kwargs["output_format"]}
75
+ else:
76
+ params = {"output_format": "mp3_44100_128"}
77
+
78
+ log(f"🎤 Calling ElevenLabs TTS for {len(text)} characters")
79
+
80
+ async with httpx.AsyncClient(timeout=30) as client:
81
+ response = await client.post(
82
+ url,
83
+ headers=headers,
84
+ json=data,
85
+ params=params
86
+ )
87
+
88
+ response.raise_for_status()
89
+ audio_data = response.content
90
+
91
+ log(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
92
+ return audio_data
93
+
94
+ except httpx.HTTPStatusError as e:
95
+ log(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
96
+ raise
97
+ except Exception as e:
98
+ log(f"❌ TTS synthesis error: {e}")
99
+ raise
100
+
101
+ def get_supported_voices(self) -> Dict[str, str]:
102
+ """Get default voices - full list can be fetched from API"""
103
+ return {
104
+ "21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
105
+ "EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
106
+ "ErXwobaYiN019PkySvjV": "Antoni (Male)",
107
+ "VR6AewLTigWG4xSOukaG": "Arnold (Male)",
108
+ "pNInz6obpgDQGcFmaJgB": "Adam (Male)",
109
+ "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
110
+ }
111
+
112
+
113
+ class BlazeTTS(TTSInterface):
114
+ """Placeholder for future Blaze TTS implementation"""
115
+
116
+ def __init__(self, api_key: str):
117
+ self.api_key = api_key
118
+
119
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
120
+ raise NotImplementedError("Blaze TTS not implemented yet")
121
+
122
+ def get_supported_voices(self) -> Dict[str, str]:
123
+ return {}
124
+
125
+
126
+ def create_tts_provider(engine: str, api_key: Optional[str] = None) -> Optional[TTSInterface]:
127
+ """Factory function to create TTS provider instances"""
128
+ if engine == "elevenlabs" and api_key:
129
+ return ElevenLabsTTS(api_key)
130
+ elif engine == "blaze" and api_key:
131
+ return BlazeTTS(api_key)
132
+ elif engine == "no_tts":
133
+ return None
134
+ else:
135
+ log(f"⚠️ Unknown or unconfigured TTS engine: {engine}")
136
+ return None