File size: 2,077 Bytes
9659078 5f924a4 9659078 5a84593 f523090 5a84593 9659078 5a84593 9659078 5f924a4 9659078 e236784 9659078 a4b64d4 5f924a4 8e00ffa 9659078 3c31edb f523090 e236784 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import gradio as gr
import os
import shutil
from huggingface_hub import snapshot_download
import numpy as np
from scipy.io import wavfile
model_ids = [
'suno/bark',
]
for model_id in model_ids:
model_name = model_id.split('/')[-1]
snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
from TTS.tts.configs.bark_config import BarkConfig
from TTS.tts.models.bark import Bark
config = BarkConfig()
model = Bark.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
def infer(prompt, input_wav_file):
# Path to your WAV file
source_path = input_wav_file
# Destination directory
destination_directory = "bark_voices"
# Extract the file name without the extension
file_name = os.path.splitext(os.path.basename(source_path))[0]
# Construct the full destination directory path
destination_path = os.path.join(destination_directory, file_name)
# Create the new directory
os.makedirs(destination_path, exist_ok=True)
# Move the WAV file to the new directory
shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav"))
text = prompt
# with random speaker
#output_dict = model.synthesize(text, config, speaker_id="random", voice_dirs=None)
# cloning a speaker.
# It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
output_dict = model.synthesize(text, config, speaker_id=f"{file_name}", voice_dirs="bark_voices/")
print(output_dict)
sample_rate = 24000 # Replace with the actual sample rate
wavfile.write('output.wav', sample_rate, output_dict['wav'])
return "output.wav"
gr.Interface(fn=infer,
inputs=[gr.Textbox(label="Text to speech prompt"),
gr.Audio(
label="WAV voice to clone",
type="filepath",
source="upload")],
outputs=[gr.Audio()],
title="Instant Voice Cloning").launch() |