Spaces:
Building
Building
# tts_google.py | |
from google.cloud import texttospeech | |
from ssml_converter import SSMLConverter | |
from logger import log_info, log_error, log_debug, log_warning | |
class GoogleCloudTTS(TTSInterface): | |
"""Google Cloud Text-to-Speech implementation""" | |
def __init__(self, credentials_path: str): | |
super().__init__() | |
self.supports_ssml = True | |
self.credentials_path = credentials_path | |
# Google TTS doesn't need preprocessing with SSML | |
self.preprocessing_flags = set() | |
# Initialize client | |
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path | |
self.client = texttospeech.TextToSpeechClient() | |
# SSML converter | |
self.ssml_converter = SSMLConverter(language="tr-TR") | |
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes: | |
"""Convert text to speech using Google Cloud TTS""" | |
try: | |
# Check if SSML should be used | |
use_ssml = kwargs.get("use_ssml", True) | |
if use_ssml and not text.startswith("<speak>"): | |
# Convert to SSML | |
text = self.ssml_converter.convert_to_ssml(text) | |
log_info(f"π Converted to SSML: {text[:200]}...") | |
input_text = texttospeech.SynthesisInput(ssml=text) | |
else: | |
input_text = texttospeech.SynthesisInput(text=text) | |
# Voice selection | |
voice = texttospeech.VoiceSelectionParams( | |
language_code=kwargs.get("language_code", "tr-TR"), | |
name=voice_id or "tr-TR-Wavenet-B", | |
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE | |
) | |
# Audio config | |
audio_config = texttospeech.AudioConfig( | |
audio_encoding=texttospeech.AudioEncoding.MP3, | |
speaking_rate=kwargs.get("speaking_rate", 1.0), | |
pitch=kwargs.get("pitch", 0.0), | |
volume_gain_db=kwargs.get("volume_gain_db", 0.0) | |
) | |
# Perform synthesis | |
response = self.client.synthesize_speech( | |
input=input_text, | |
voice=voice, | |
audio_config=audio_config | |
) | |
log_info(f"β Google TTS returned {len(response.audio_content)} bytes") | |
return response.audio_content | |
except Exception as e: | |
log_error("β Google TTS error", e) | |
raise |