Spaces:

peechapp
/

PeechTTSv22050

Sleeping

App Files Files Community

PeechTTSv22050 / demo /delightful_univnet.py

nickovchinnikov

Fix missed file

bbd9e13 about 1 year ago

raw

history blame contribute delete

2.4 kB

	import tempfile

	from gradio import Checkbox, Dropdown, Interface, Textbox
	import soundfile as sf
	import torch
	from voicefixer import VoiceFixer

	from models.delightful_univnet import DelightfulUnivnet
	from training.datasets.hifi_libri_dataset import speakers_hifi_ids

	from .config import speakers_delightful_22050

	delightful_checkpoint_path = "epoch=5816-step=390418.ckpt"

	device = torch.device("cpu")

	delightfulunivnet_22050 = DelightfulUnivnet(
	delightful_checkpoint_path=delightful_checkpoint_path,
	).to(device)

	voicefixer = VoiceFixer()


	def generate_audio(text: str, speaker_name: str, fix_voice: bool):
	speaker = torch.tensor(
	[speakers_delightful_22050[speaker_name]],
	device=device,
	)
	with torch.no_grad():
	wav = delightfulunivnet_22050.forward(text, speaker)
	wav = wav.squeeze().detach().cpu().numpy()

	if fix_voice:
	# Save the numpy array to a temporary wav file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as input_file:
	# Write to the temp wav file
	sf.write(input_file.name, wav, delightfulunivnet_22050.sampling_rate)

	with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as output_file:
	voicefixer.restore(
	input=input_file.name, # low quality .wav/.flac file
	output=output_file.name, # save file path
	cuda=False, # GPU acceleration off
	mode=0,
	)

	# Read the wav file back into a numpy array
	wav_vf, sampling_rate = sf.read(output_file.name)

	return sampling_rate, wav_vf

	return delightfulunivnet_22050.sampling_rate, wav


	interfaceDelightfulUnuvnet22050 = Interface(
	generate_audio,
	[
	Textbox(
	label="Text",
	value="As the snake shook its head, a deafening shout behind Harry made both of them jump.",
	),
	Dropdown(
	label="Speaker",
	choices=list(speakers_delightful_22050.keys()),
	value=speakers_hifi_ids[0],
	),
	Checkbox(
	label="Fix voice (Voicefixer)",
	value=False,
	),
	],
	outputs="audio",
	title=f"Delightful UnivNet, Sampling Rate: {delightfulunivnet_22050.sampling_rate}. When Voicefixer is enabled, the Simpling Rate is 44100.",
	)