API_MC_AI / VietTTS /text_to_speech.py
duyv's picture
Upload text_to_speech.py
17cf0ef verified
import sys
import os
import base64
from tts import TTS
from utils.file_utils import load_prompt_speech_from_file, load_voices
from app.config import settings
from datetime import datetime
def load_model(
speed,
voice,
text,
output_path,
output_format="wav",
):
print("Loading TTS model...", settings.DIR_ROOT)
tts_obj = TTS(model_dir=os.path.join(settings.DIR_ROOT, "VietTTS", "models"))
VOICE_MAP = load_voices(os.path.join(settings.DIR_ROOT, "VietTTS", "samples"))
speed = float(speed)
if voice.isdigit():
voice_file = list(VOICE_MAP.values())[int(voice)]
else:
voice_file = VOICE_MAP.get(voice)
if not voice_file or not os.path.exists(voice_file):
raise ValueError("Voice file not found")
print(f"Output path: {output_path}")
prompt_speech_16k = load_prompt_speech_from_file(filepath=voice_file, min_duration=3, max_duration=10)
tts_obj.tts_to_file(text=text, prompt_speech_16k=prompt_speech_16k, output_path=output_path, speed=speed)
print("END TTS worker")
if __name__ == "__main__":
speed = sys.argv[1]
voice = sys.argv[2]
text = sys.argv[3]
output_path = sys.argv[4]
output_format = sys.argv[5] if len(sys.argv) > 5 else "wav"
load_model(speed, voice, text, output_path, output_format)