Spaces:

ixxan
/

uyghur-pronunciation-checker

Running

Abdurahman

app

f0e249a 15 days ago

1.19 kB

	from transformers import VitsModel, AutoTokenizer
	import torch
	import scipy.io.wavfile
	import util

	# Model ID and setup
	model_id = "facebook/mms-tts-uig-script_arabic"
	tts_tokenizer = AutoTokenizer.from_pretrained(model_id)
	tts_model = VitsModel.from_pretrained(model_id)

	# Automatically allocate the device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	tts_model = tts_model.to(device)

	def generate_audio(input_text, script):
	"""
	Generate audio for the given input text and script
	"""
	# Convert text to Uyghur Arabic if needed
	if script != "Uyghur Arabic":
	input_text = util.ug_latn_to_arab(input_text)

	# Tokenize and move inputs to the same device as the model
	tts_inputs = tts_tokenizer(input_text, return_tensors="pt").to(device)

	# Perform inference
	with torch.no_grad():
	tts_output = tts_model(**tts_inputs).waveform.cpu() # Move output back to CPU for saving

	# Save to a temporary file
	output_path = "tts_output.wav"
	sample_rate = 16000
	scipy.io.wavfile.write(output_path, rate=sample_rate, data=tts_output.numpy()[0])

	# Return the audio file path
	return output_path