Spaces:

UcsTurkey
/

flare

Building

File size: 2,519 Bytes

# tts_google.py
from google.cloud import texttospeech
from .ssml_converter import SSMLConverter
from utils.logger import log_info, log_error, log_debug, log_warning

class GoogleCloudTTS(TTSInterface):
    """Google Cloud Text-to-Speech implementation"""

    def __init__(self, credentials_path: str):
        super().__init__()
        self.supports_ssml = True
        self.credentials_path = credentials_path

        # Google TTS doesn't need preprocessing with SSML
        self.preprocessing_flags = set()

        # Initialize client
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
        self.client = texttospeech.TextToSpeechClient()

        # SSML converter
        self.ssml_converter = SSMLConverter(language="tr-TR")

    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
        """Convert text to speech using Google Cloud TTS"""
        try:
            # Check if SSML should be used
            use_ssml = kwargs.get("use_ssml", True)

            if use_ssml and not text.startswith("<speak>"):
                # Convert to SSML
                text = self.ssml_converter.convert_to_ssml(text)
                log_info(f"📝 Converted to SSML: {text[:200]}...")
                input_text = texttospeech.SynthesisInput(ssml=text)
            else:
                input_text = texttospeech.SynthesisInput(text=text)

            # Voice selection
            voice = texttospeech.VoiceSelectionParams(
                language_code=kwargs.get("language_code", "tr-TR"),
                name=voice_id or "tr-TR-Wavenet-B",
                ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
            )

            # Audio config
            audio_config = texttospeech.AudioConfig(
                audio_encoding=texttospeech.AudioEncoding.MP3,
                speaking_rate=kwargs.get("speaking_rate", 1.0),
                pitch=kwargs.get("pitch", 0.0),
                volume_gain_db=kwargs.get("volume_gain_db", 0.0)
            )

            # Perform synthesis
            response = self.client.synthesize_speech(
                input=input_text,
                voice=voice,
                audio_config=audio_config
            )

            log_info(f"✅ Google TTS returned {len(response.audio_content)} bytes")
            return response.audio_content

        except Exception as e:
            log_error("❌ Google TTS error", e)
            raise