flare / tts /tts_google.py
ciyidogan's picture
Upload 134 files
edec17e verified
# tts_google.py
from google.cloud import texttospeech
from .ssml_converter import SSMLConverter
from utils.logger import log_info, log_error, log_debug, log_warning
class GoogleCloudTTS(TTSInterface):
"""Google Cloud Text-to-Speech implementation"""
def __init__(self, credentials_path: str):
super().__init__()
self.supports_ssml = True
self.credentials_path = credentials_path
# Google TTS doesn't need preprocessing with SSML
self.preprocessing_flags = set()
# Initialize client
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
self.client = texttospeech.TextToSpeechClient()
# SSML converter
self.ssml_converter = SSMLConverter(language="tr-TR")
async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
"""Convert text to speech using Google Cloud TTS"""
try:
# Check if SSML should be used
use_ssml = kwargs.get("use_ssml", True)
if use_ssml and not text.startswith("<speak>"):
# Convert to SSML
text = self.ssml_converter.convert_to_ssml(text)
log_info(f"πŸ“ Converted to SSML: {text[:200]}...")
input_text = texttospeech.SynthesisInput(ssml=text)
else:
input_text = texttospeech.SynthesisInput(text=text)
# Voice selection
voice = texttospeech.VoiceSelectionParams(
language_code=kwargs.get("language_code", "tr-TR"),
name=voice_id or "tr-TR-Wavenet-B",
ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
)
# Audio config
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3,
speaking_rate=kwargs.get("speaking_rate", 1.0),
pitch=kwargs.get("pitch", 0.0),
volume_gain_db=kwargs.get("volume_gain_db", 0.0)
)
# Perform synthesis
response = self.client.synthesize_speech(
input=input_text,
voice=voice,
audio_config=audio_config
)
log_info(f"βœ… Google TTS returned {len(response.audio_content)} bytes")
return response.audio_content
except Exception as e:
log_error("❌ Google TTS error", e)
raise