ciyidogan commited on
Commit
31c83a9
Β·
verified Β·
1 Parent(s): dac0e8a

Update tts/tts_google.py

Browse files
Files changed (1) hide show
  1. tts/tts_google.py +64 -64
tts/tts_google.py CHANGED
@@ -1,65 +1,65 @@
1
- # tts_google.py
2
- from google.cloud import texttospeech
3
- from ssml_converter import SSMLConverter
4
- from utils.logger import log_info, log_error, log_debug, log_warning
5
-
6
- class GoogleCloudTTS(TTSInterface):
7
- """Google Cloud Text-to-Speech implementation"""
8
-
9
- def __init__(self, credentials_path: str):
10
- super().__init__()
11
- self.supports_ssml = True
12
- self.credentials_path = credentials_path
13
-
14
- # Google TTS doesn't need preprocessing with SSML
15
- self.preprocessing_flags = set()
16
-
17
- # Initialize client
18
- os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
19
- self.client = texttospeech.TextToSpeechClient()
20
-
21
- # SSML converter
22
- self.ssml_converter = SSMLConverter(language="tr-TR")
23
-
24
- async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
25
- """Convert text to speech using Google Cloud TTS"""
26
- try:
27
- # Check if SSML should be used
28
- use_ssml = kwargs.get("use_ssml", True)
29
-
30
- if use_ssml and not text.startswith("<speak>"):
31
- # Convert to SSML
32
- text = self.ssml_converter.convert_to_ssml(text)
33
- log_info(f"πŸ“ Converted to SSML: {text[:200]}...")
34
- input_text = texttospeech.SynthesisInput(ssml=text)
35
- else:
36
- input_text = texttospeech.SynthesisInput(text=text)
37
-
38
- # Voice selection
39
- voice = texttospeech.VoiceSelectionParams(
40
- language_code=kwargs.get("language_code", "tr-TR"),
41
- name=voice_id or "tr-TR-Wavenet-B",
42
- ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
43
- )
44
-
45
- # Audio config
46
- audio_config = texttospeech.AudioConfig(
47
- audio_encoding=texttospeech.AudioEncoding.MP3,
48
- speaking_rate=kwargs.get("speaking_rate", 1.0),
49
- pitch=kwargs.get("pitch", 0.0),
50
- volume_gain_db=kwargs.get("volume_gain_db", 0.0)
51
- )
52
-
53
- # Perform synthesis
54
- response = self.client.synthesize_speech(
55
- input=input_text,
56
- voice=voice,
57
- audio_config=audio_config
58
- )
59
-
60
- log_info(f"βœ… Google TTS returned {len(response.audio_content)} bytes")
61
- return response.audio_content
62
-
63
- except Exception as e:
64
- log_error("❌ Google TTS error", e)
65
  raise
 
1
+ # tts_google.py
2
+ from google.cloud import texttospeech
3
+ from .ssml_converter import SSMLConverter
4
+ from utils.logger import log_info, log_error, log_debug, log_warning
5
+
6
+ class GoogleCloudTTS(TTSInterface):
7
+ """Google Cloud Text-to-Speech implementation"""
8
+
9
+ def __init__(self, credentials_path: str):
10
+ super().__init__()
11
+ self.supports_ssml = True
12
+ self.credentials_path = credentials_path
13
+
14
+ # Google TTS doesn't need preprocessing with SSML
15
+ self.preprocessing_flags = set()
16
+
17
+ # Initialize client
18
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = credentials_path
19
+ self.client = texttospeech.TextToSpeechClient()
20
+
21
+ # SSML converter
22
+ self.ssml_converter = SSMLConverter(language="tr-TR")
23
+
24
+ async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
25
+ """Convert text to speech using Google Cloud TTS"""
26
+ try:
27
+ # Check if SSML should be used
28
+ use_ssml = kwargs.get("use_ssml", True)
29
+
30
+ if use_ssml and not text.startswith("<speak>"):
31
+ # Convert to SSML
32
+ text = self.ssml_converter.convert_to_ssml(text)
33
+ log_info(f"πŸ“ Converted to SSML: {text[:200]}...")
34
+ input_text = texttospeech.SynthesisInput(ssml=text)
35
+ else:
36
+ input_text = texttospeech.SynthesisInput(text=text)
37
+
38
+ # Voice selection
39
+ voice = texttospeech.VoiceSelectionParams(
40
+ language_code=kwargs.get("language_code", "tr-TR"),
41
+ name=voice_id or "tr-TR-Wavenet-B",
42
+ ssml_gender=texttospeech.SsmlVoiceGender.FEMALE
43
+ )
44
+
45
+ # Audio config
46
+ audio_config = texttospeech.AudioConfig(
47
+ audio_encoding=texttospeech.AudioEncoding.MP3,
48
+ speaking_rate=kwargs.get("speaking_rate", 1.0),
49
+ pitch=kwargs.get("pitch", 0.0),
50
+ volume_gain_db=kwargs.get("volume_gain_db", 0.0)
51
+ )
52
+
53
+ # Perform synthesis
54
+ response = self.client.synthesize_speech(
55
+ input=input_text,
56
+ voice=voice,
57
+ audio_config=audio_config
58
+ )
59
+
60
+ log_info(f"βœ… Google TTS returned {len(response.audio_content)} bytes")
61
+ return response.audio_content
62
+
63
+ except Exception as e:
64
+ log_error("❌ Google TTS error", e)
65
  raise