Spaces:

Adipta
/

voice-ai

Sleeping

File size: 1,508 Bytes

import time
import os
import json
import tempfile

import google.cloud.texttospeech as tts
import simpleaudio as sa

# process of getting credentials
def get_credentials():
    creds_json_str = os.getenv("GOOGLE")
    if creds_json_str is None:
        raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")

    # create a temporary file
    with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
        temp.write(creds_json_str) # write in json format
        temp_filename = temp.name 

    return temp_filename
    
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_credentials()

class TextToSpeech:
    def __init__(self):
        self.voice_params = tts.VoiceSelectionParams(
            language_code="id-ID", name="id-ID-Standard-A"
        )
        self.audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16, speaking_rate=1.25)
        self.client = tts.TextToSpeechClient()

    def text_to_speech(self, text: str):
        
        start = time.time()
        text_input = tts.SynthesisInput(text=text)
        response = self.client.synthesize_speech(
            input=text_input,
            voice=self.voice_params,
            audio_config=self.audio_config,
        )
        end = time.time()
        print(f"Time taken to synthesize speech: {end-start:.2f}s")
        
        play_obj = sa.play_buffer(response.audio_content, num_channels=1, bytes_per_sample=2, sample_rate=24000)
        play_obj.wait_done()