Spaces:

fffiloni
/

instant-TTS-Bark-cloning

Paused

fffiloni commited on Aug 21, 2023

Commit

5f924a4

•

1 Parent(s): 3c31edb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 from huggingface_hub import snapshot_download
@@ -26,7 +27,26 @@ config = BarkConfig()
 model = Bark.init_from_config(config)
 model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
-def infer(prompt):
     text = "Hello, my name is Manmay , how are you?"
@@ -35,7 +55,7 @@ def infer(prompt):
     # cloning a speaker.
     # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
-    output_dict = model.synthesize(text, config, speaker_id="speaker", voice_dirs="bark_voices/")
     print(output_dict)
     sample_rate = 24000  # Replace with the actual sample rate
@@ -45,4 +65,4 @@ def infer(prompt):
     return "output.wav"
-gr.Interface(fn=infer, inputs=[gr.Textbox()], outputs=[gr.Audio()]).launch()

 import gradio as gr
+import os
+import shutil
 from huggingface_hub import snapshot_download
 model = Bark.init_from_config(config)
 model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
+def infer(prompt, input_wav_file):
+    # Path to your WAV file
+    source_path = input_wav_file
+    # Destination directory
+    destination_directory = "bark_voices"
+    # Extract the file name without the extension
+    file_name = os.path.splitext(os.path.basename(source_path))[0]
+    # Construct the full destination directory path
+    destination_path = os.path.join(destination_directory, file_name)
+    # Create the new directory
+    os.makedirs(destination_path, exist_ok=True)
+    # Move the WAV file to the new directory
+    shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav"))
     text = "Hello, my name is Manmay , how are you?"
     # cloning a speaker.
     # It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
+    output_dict = model.synthesize(text, config, speaker_id=f"{file_name}", voice_dirs="bark_voices/")
     print(output_dict)
     sample_rate = 24000  # Replace with the actual sample rate
     return "output.wav"
+gr.Interface(fn=infer, inputs=[gr.Textbox(), gr.Audio(type="filepath", source="upload")], outputs=[gr.Audio()]).launch()