Spaces:

aletrn
/

ai-pronunciation-trainer

Running

alessandro trinca tornidor

feat: port whisper and faster-whisper support from https://github.com/Thiagohgl/ai-pronunciation-trainer

85b7206 4 months ago

2.06 kB

	import base64
	import json
	import os
	import tempfile
	from pathlib import Path

	import soundfile as sf

	import AIModels
	import models
	import utilsFileIO
	from constants import app_logger, sample_rate_resample


	def get_tts(text: str, language: str, tmp_prefix="audio_", tmp_suffix=".wav") -> str:
	"""
	Generate text-to-speech (TTS) audio for the given text and language.

	Args:
	text (str): The text to be converted to speech.
	language (str): The language of the text. Supported languages are "en" (English) and "de" (German).
	tmp_prefix (str, optional): The temporary directory to use for temporary files.
	tmp_suffix (str, optional): The temporary directory to use for temporary files.

	Returns:
	str: The path to the generated audio file.

	Raises:
	NotImplementedError: If the provided language is not supported.

	Notes:
	This function uses the Silero TTS model to generate the audio. The model and speaker are selected based on the provided language.
	"""

	if text is None or len(text) == 0:
	raise ValueError(f"cannot read an empty/None text: '{text}'...")
	if language is None or len(language) == 0:
	raise NotImplementedError(f"Not tested/supported with '{language}' language...")

	tmp_dir = Path(tempfile.gettempdir())
	try:
	model, _, speaker, sample_rate = models.__silero_tts(
	language, output_folder=tmp_dir
	)
	except ValueError:
	model, _, sample_rate, _, _, speaker = models.__silero_tts(
	language, output_folder=tmp_dir
	)
	app_logger.info(f"model speaker #0: {speaker} ...")

	with tempfile.NamedTemporaryFile(prefix=tmp_prefix, suffix=tmp_suffix, delete=False) as tmp_audio_file:
	app_logger.info(f"tmp_audio_file output: {tmp_audio_file.name} ...")
	audio_paths = model.save_wav(text=text, speaker=speaker, sample_rate=sample_rate, audio_path=str(tmp_audio_file.name))
	app_logger.info(f"audio_paths output: {audio_paths} ...")
	return audio_paths