Spaces:

peechapp
/

PeechTTSv22050

Sleeping

App Files Files Community

nickovchinnikov commited on Jun 7, 2024

Commit

bbd9e13

1 Parent(s): 52e3665

Fix missed file

Browse files

Files changed (1) hide show

demo/delightful_univnet.py +74 -0

demo/delightful_univnet.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import tempfile
+from gradio import Checkbox, Dropdown, Interface, Textbox
+import soundfile as sf
+import torch
+from voicefixer import VoiceFixer
+from models.delightful_univnet import DelightfulUnivnet
+from training.datasets.hifi_libri_dataset import speakers_hifi_ids
+from .config import speakers_delightful_22050
+delightful_checkpoint_path = "epoch=5816-step=390418.ckpt"
+device = torch.device("cpu")
+delightfulunivnet_22050 = DelightfulUnivnet(
+    delightful_checkpoint_path=delightful_checkpoint_path,
+).to(device)
+voicefixer = VoiceFixer()
+def generate_audio(text: str, speaker_name: str, fix_voice: bool):
+    speaker = torch.tensor(
+        [speakers_delightful_22050[speaker_name]],
+        device=device,
+    )
+    with torch.no_grad():
+        wav = delightfulunivnet_22050.forward(text, speaker)
+        wav = wav.squeeze().detach().cpu().numpy()
+    if fix_voice:
+        # Save the numpy array to a temporary wav file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as input_file:
+            # Write to the temp wav file
+            sf.write(input_file.name, wav, delightfulunivnet_22050.sampling_rate)
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as output_file:
+                voicefixer.restore(
+                    input=input_file.name,  # low quality .wav/.flac file
+                    output=output_file.name,  # save file path
+                    cuda=False,  # GPU acceleration off
+                    mode=0,
+                )
+                # Read the wav file back into a numpy array
+                wav_vf, sampling_rate = sf.read(output_file.name)
+                return sampling_rate, wav_vf
+    return delightfulunivnet_22050.sampling_rate, wav
+interfaceDelightfulUnuvnet22050 = Interface(
+    generate_audio,
+    [
+        Textbox(
+            label="Text",
+            value="As the snake shook its head, a deafening shout behind Harry made both of them jump.",
+        ),
+        Dropdown(
+            label="Speaker",
+            choices=list(speakers_delightful_22050.keys()),
+            value=speakers_hifi_ids[0],
+        ),
+        Checkbox(
+            label="Fix voice (Voicefixer)",
+            value=False,
+        ),
+    ],
+    outputs="audio",
+    title=f"Delightful UnivNet, Sampling Rate: {delightfulunivnet_22050.sampling_rate}. When Voicefixer is enabled, the Simpling Rate is 44100.",
+)