File size: 2,519 Bytes
edec17e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9874d4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# tts_google.py
from google.cloud import texttospeech
from .ssml_converter import SSMLConverter
from utils.logger import log_info, log_error, log_debug, log_warning

class GoogleCloudTTS(TTSInterface):
    """Google Cloud Text-to-Speech implementation"""

    def __init__(self, credentials_path: str):
        super().__init__()
        self.supports_ssml = True
        self.credentials_path = credentials_path

        # Google TTS doesn't need preprocessing with SSML
        self.preprocessing_flags = set()

        # Initialize client
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
        self.client = texttospeech.TextToSpeechClient()

        # SSML converter
        self.ssml_converter = SSMLConverter(language="tr-TR")

    async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
        """Convert text to speech using Google Cloud TTS"""
        try:
            # Check if SSML should be used
            use_ssml = kwargs.get("use_ssml", True)

            if use_ssml and not text.startswith("<speak>"):
                # Convert to SSML
                text = self.ssml_converter.convert_to_ssml(text)
                log_info(f"πŸ“ Converted to SSML: {text[:200]}...")
                input_text = texttospeech.SynthesisInput(ssml=text)
            else:
                input_text = texttospeech.SynthesisInput(text=text)

            # Voice selection
            voice = texttospeech.VoiceSelectionParams(
                language_code=kwargs.get("language_code", "tr-TR"),
                name=voice_id or "tr-TR-Wavenet-B",
                ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
            )

            # Audio config
            audio_config = texttospeech.AudioConfig(
                audio_encoding=texttospeech.AudioEncoding.MP3,
                speaking_rate=kwargs.get("speaking_rate", 1.0),
                pitch=kwargs.get("pitch", 0.0),
                volume_gain_db=kwargs.get("volume_gain_db", 0.0)
            )

            # Perform synthesis
            response = self.client.synthesize_speech(
                input=input_text,
                voice=voice,
                audio_config=audio_config
            )

            log_info(f"βœ… Google TTS returned {len(response.audio_content)} bytes")
            return response.audio_content

        except Exception as e:
            log_error("❌ Google TTS error", e)
            raise