PeechTTSv22050 / demo /delightful_univnet.py
nickovchinnikov's picture
Fix missed file
bbd9e13
import tempfile
from gradio import Checkbox, Dropdown, Interface, Textbox
import soundfile as sf
import torch
from voicefixer import VoiceFixer
from models.delightful_univnet import DelightfulUnivnet
from training.datasets.hifi_libri_dataset import speakers_hifi_ids
from .config import speakers_delightful_22050
delightful_checkpoint_path = "epoch=5816-step=390418.ckpt"
device = torch.device("cpu")
delightfulunivnet_22050 = DelightfulUnivnet(
delightful_checkpoint_path=delightful_checkpoint_path,
).to(device)
voicefixer = VoiceFixer()
def generate_audio(text: str, speaker_name: str, fix_voice: bool):
speaker = torch.tensor(
[speakers_delightful_22050[speaker_name]],
device=device,
)
with torch.no_grad():
wav = delightfulunivnet_22050.forward(text, speaker)
wav = wav.squeeze().detach().cpu().numpy()
if fix_voice:
# Save the numpy array to a temporary wav file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as input_file:
# Write to the temp wav file
sf.write(input_file.name, wav, delightfulunivnet_22050.sampling_rate)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as output_file:
voicefixer.restore(
input=input_file.name, # low quality .wav/.flac file
output=output_file.name, # save file path
cuda=False, # GPU acceleration off
mode=0,
)
# Read the wav file back into a numpy array
wav_vf, sampling_rate = sf.read(output_file.name)
return sampling_rate, wav_vf
return delightfulunivnet_22050.sampling_rate, wav
interfaceDelightfulUnuvnet22050 = Interface(
generate_audio,
[
Textbox(
label="Text",
value="As the snake shook its head, a deafening shout behind Harry made both of them jump.",
),
Dropdown(
label="Speaker",
choices=list(speakers_delightful_22050.keys()),
value=speakers_hifi_ids[0],
),
Checkbox(
label="Fix voice (Voicefixer)",
value=False,
),
],
outputs="audio",
title=f"Delightful UnivNet, Sampling Rate: {delightfulunivnet_22050.sampling_rate}. When Voicefixer is enabled, the Simpling Rate is 44100.",
)