Spaces:

fffiloni
/

instant-TTS-Bark-cloning

Paused

fffiloni commited on Aug 22, 2023

Commit

0435c60

1 Parent(s): 0c32eee

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from huggingface_hub import snapshot_download
 import numpy as np
 from scipy.io import wavfile
 model_ids = [
     'suno/bark',
 ]
@@ -49,12 +48,22 @@ def infer(prompt, input_wav_file):
     # cloning a speaker.
     # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
-    output_dict = model.synthesize(text, config, speaker_id=f"{file_name}", voice_dirs="bark_voices/")
     print(output_dict)
     sample_rate = 24000  # Replace with the actual sample rate
-    wavfile.write('output.wav', sample_rate, output_dict['wav'])
     # List all the files and subdirectories in the given directory
     contents = os.listdir(f"bark_voices/{file_name}")
@@ -63,13 +72,27 @@ def infer(prompt, input_wav_file):
     for item in contents:
         print(item)
-    return "output.wav"
-gr.Interface(fn=infer,
-             inputs=[gr.Textbox(label="Text to speech prompt"),
-                     gr.Audio(
-                         label="WAV voice to clone",
-                         type="filepath",
-                         source="upload")],
-             outputs=[gr.Audio()],
-             title="Instant Voice Cloning").launch()

 import numpy as np
 from scipy.io import wavfile
 model_ids = [
     'suno/bark',
 ]
     # cloning a speaker.
     # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
+    output_dict = model.synthesize(
+        text,
+        config,
+        speaker_id=f"{file_name}",
+        voice_dirs="bark_voices/"
+    )
     print(output_dict)
     sample_rate = 24000  # Replace with the actual sample rate
+    wavfile.write(
+        'output.wav',
+        sample_rate,
+        output_dict['wav']
+    )
     # List all the files and subdirectories in the given directory
     contents = os.listdir(f"bark_voices/{file_name}")
     for item in contents:
         print(item)
+    return "output.wav", f"bark_voices/{file_name}/{content[1]}"
+gr.Interface(
+    fn=infer,
+    inputs=[
+        gr.Textbox(
+            label="Text to speech prompt"
+        ),
+        gr.Audio(
+            label="WAV voice to clone",
+            type="filepath",
+            source="upload"
+        )
+    ],
+    outputs=[
+        gr.Audio(
+            label="Text to speech output"
+        ),
+        gr.File(
+            label=".npz file"
+        )
+    ],
+    title="Instant Voice Cloning"
+).launch()