PeechTTSv22050 / demo /delightful_univnet.py
nickovchinnikov's picture
Fix missed file
bbd9e13
raw
history blame
2.4 kB
import tempfile
from gradio import Checkbox, Dropdown, Interface, Textbox
import soundfile as sf
import torch
from voicefixer import VoiceFixer
from models.delightful_univnet import DelightfulUnivnet
from training.datasets.hifi_libri_dataset import speakers_hifi_ids
from .config import speakers_delightful_22050
delightful_checkpoint_path = "epoch=5816-step=390418.ckpt"
device = torch.device("cpu")
delightfulunivnet_22050 = DelightfulUnivnet(
delightful_checkpoint_path=delightful_checkpoint_path,
).to(device)
voicefixer = VoiceFixer()
def generate_audio(text: str, speaker_name: str, fix_voice: bool):
speaker = torch.tensor(
[speakers_delightful_22050[speaker_name]],
device=device,
)
with torch.no_grad():
wav = delightfulunivnet_22050.forward(text, speaker)
wav = wav.squeeze().detach().cpu().numpy()
if fix_voice:
# Save the numpy array to a temporary wav file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as input_file:
# Write to the temp wav file
sf.write(input_file.name, wav, delightfulunivnet_22050.sampling_rate)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as output_file:
voicefixer.restore(
input=input_file.name, # low quality .wav/.flac file
output=output_file.name, # save file path
cuda=False, # GPU acceleration off
mode=0,
)
# Read the wav file back into a numpy array
wav_vf, sampling_rate = sf.read(output_file.name)
return sampling_rate, wav_vf
return delightfulunivnet_22050.sampling_rate, wav
interfaceDelightfulUnuvnet22050 = Interface(
generate_audio,
[
Textbox(
label="Text",
value="As the snake shook its head, a deafening shout behind Harry made both of them jump.",
),
Dropdown(
label="Speaker",
choices=list(speakers_delightful_22050.keys()),
value=speakers_hifi_ids[0],
),
Checkbox(
label="Fix voice (Voicefixer)",
value=False,
),
],
outputs="audio",
title=f"Delightful UnivNet, Sampling Rate: {delightfulunivnet_22050.sampling_rate}. When Voicefixer is enabled, the Simpling Rate is 44100.",
)