ciyidogan commited on
Commit
b861704
Β·
verified Β·
1 Parent(s): 1620c6f

Update tts_interface.py

Browse files
Files changed (1) hide show
  1. tts_interface.py +52 -51
tts_interface.py CHANGED
@@ -3,7 +3,7 @@ TTS Interface and Implementations
3
  """
4
 
5
  from abc import ABC, abstractmethod
6
- from typing import Optional, Dict, Any
7
  import httpx
8
  import os
9
  from datetime import datetime
@@ -33,12 +33,11 @@ class TTSInterface(ABC):
33
  """Get list of supported voices"""
34
  pass
35
 
36
- @abstractmethod
37
  def get_preprocessing_flags(self) -> Set[str]:
38
  """Get preprocessing flags for this provider"""
39
- pass
40
 
41
- def supports_ssml(self) -> bool:
42
  """Check if provider supports SSML"""
43
  return self.supports_ssml
44
 
@@ -46,6 +45,7 @@ class ElevenLabsTTS(TTSInterface):
46
  """ElevenLabs TTS implementation"""
47
 
48
  def __init__(self, api_key: str):
 
49
  self.api_key = api_key
50
  self.base_url = "https://api.elevenlabs.io/v1"
51
  self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
@@ -60,54 +60,58 @@ class ElevenLabsTTS(TTSInterface):
60
  }
61
 
62
  self.preprocessor = TTSPreprocessor(language="tr")
63
-
64
- # Debug log
65
- masked_key = f"{api_key[:4]}...{api_key[-4:]}" if len(api_key) > 8 else "***"
66
- log(f"πŸ”‘ ElevenLabsTTS initialized with key: {masked_key}")
67
-
68
- async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
 
 
 
69
  """Convert text to speech using ElevenLabs API"""
70
- try:
71
- # Apply preprocessing if not disabled
72
- if kwargs.get("disable_preprocessing", False) != True:
73
- text = self.preprocessor.preprocess(text, self.preprocessing_flags)
74
- log(f"πŸ“ Preprocessed text: {text[:100]}...")
75
-
76
- voice = voice_id or self.default_voice_id
77
- url = f"{self.base_url}/text-to-speech/{voice}"
78
-
79
- headers = {
80
- "xi-api-key": self.api_key,
81
- "Content-Type": "application/json"
82
- }
83
-
84
- # Default parameters
85
- data = {
86
- "text": text,
87
- "model_id": kwargs.get("model_id", "eleven_multilingual_v2"),
88
- "voice_settings": kwargs.get("voice_settings", {
89
- "stability": 1,
90
- "similarity_boost": 0.85,
91
- "style": 0.7,
92
- "speed": 1.14,
 
93
  "use_speaker_boost": True
94
- })
95
  }
96
-
97
- # Add optional parameters
98
- if "output_format" in kwargs:
99
- params = {"output_format": kwargs["output_format"]}
100
- else:
101
- params = {"output_format": "mp3_44100_128"}
102
-
103
- log(f"🎀 Calling ElevenLabs TTS for {len(text)} characters")
104
-
105
- async with httpx.AsyncClient(timeout=30) as client:
 
106
  response = await client.post(
107
- url,
 
108
  headers=headers,
109
- json=data,
110
- params=params
111
  )
112
 
113
  response.raise_for_status()
@@ -134,14 +138,11 @@ class ElevenLabsTTS(TTSInterface):
134
  "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
135
  }
136
 
137
- def get_preprocessing_flags(self) -> Set[str]:
138
- """Get preprocessing flags for ElevenLabs"""
139
- return self.preprocessing_flags
140
-
141
  class BlazeTTS(TTSInterface):
142
  """Placeholder for future Blaze TTS implementation"""
143
 
144
  def __init__(self, api_key: str):
 
145
  self.api_key = api_key
146
 
147
  async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
 
3
  """
4
 
5
  from abc import ABC, abstractmethod
6
+ from typing import Optional, Dict, Any, Set
7
  import httpx
8
  import os
9
  from datetime import datetime
 
33
  """Get list of supported voices"""
34
  pass
35
 
 
36
  def get_preprocessing_flags(self) -> Set[str]:
37
  """Get preprocessing flags for this provider"""
38
+ return self.preprocessing_flags
39
 
40
+ def supports_ssml_format(self) -> bool:
41
  """Check if provider supports SSML"""
42
  return self.supports_ssml
43
 
 
45
  """ElevenLabs TTS implementation"""
46
 
47
  def __init__(self, api_key: str):
48
+ super().__init__()
49
  self.api_key = api_key
50
  self.base_url = "https://api.elevenlabs.io/v1"
51
  self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia
 
60
  }
61
 
62
  self.preprocessor = TTSPreprocessor(language="tr")
63
+
64
+ async def synthesize(
65
+ self,
66
+ text: str,
67
+ voice_id: Optional[str] = None,
68
+ model_id: Optional[str] = None,
69
+ output_format: Optional[str] = None,
70
+ **kwargs
71
+ ) -> bytes:
72
  """Convert text to speech using ElevenLabs API"""
73
+
74
+ # Preprocess text
75
+ processed_text = self.preprocessor.process(text, self.preprocessing_flags)
76
+
77
+ # Use defaults if not provided
78
+ voice_id = voice_id or self.default_voice_id
79
+ model_id = model_id or "eleven_multilingual_v2"
80
+ output_format = output_format or "mp3_44100_128"
81
+
82
+ url = f"{self.base_url}/text-to-speech/{voice_id}"
83
+
84
+ headers = {
85
+ "Accept": "audio/mpeg",
86
+ "Content-Type": "application/json",
87
+ "xi-api-key": self.api_key
88
+ }
89
+
90
+ data = {
91
+ "text": processed_text,
92
+ "model_id": model_id,
93
+ "voice_settings": {
94
+ "stability": 0.5,
95
+ "similarity_boost": 0.75,
96
+ "style": 0.0,
97
  "use_speaker_boost": True
 
98
  }
99
+ }
100
+
101
+ # Add output format to URL if specified
102
+ if output_format:
103
+ url += f"?output_format={output_format}"
104
+
105
+ try:
106
+ async with httpx.AsyncClient() as client:
107
+ log(f"🎀 ElevenLabs TTS request: voice={voice_id}, model={model_id}")
108
+ log(f"πŸ“ Text (first 100 chars): {processed_text[:100]}...")
109
+
110
  response = await client.post(
111
+ url,
112
+ json=data,
113
  headers=headers,
114
+ timeout=30.0
 
115
  )
116
 
117
  response.raise_for_status()
 
138
  "yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
139
  }
140
 
 
 
 
 
141
  class BlazeTTS(TTSInterface):
142
  """Placeholder for future Blaze TTS implementation"""
143
 
144
  def __init__(self, api_key: str):
145
+ super().__init__()
146
  self.api_key = api_key
147
 
148
  async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: