|
import gradio as gr |
|
import os |
|
import shutil |
|
|
|
from huggingface_hub import snapshot_download |
|
import numpy as np |
|
from scipy.io import wavfile |
|
|
|
|
|
model_ids = [ |
|
'suno/bark', |
|
] |
|
for model_id in model_ids: |
|
model_name = model_id.split('/')[-1] |
|
snapshot_download(model_id, local_dir=f'checkpoints/{model_name}') |
|
|
|
from TTS.tts.configs.bark_config import BarkConfig |
|
from TTS.tts.models.bark import Bark |
|
|
|
config = BarkConfig() |
|
model = Bark.init_from_config(config) |
|
model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True) |
|
|
|
def infer(prompt, input_wav_file): |
|
|
|
|
|
source_path = input_wav_file |
|
|
|
|
|
destination_directory = "bark_voices" |
|
|
|
|
|
file_name = os.path.splitext(os.path.basename(source_path))[0] |
|
|
|
|
|
destination_path = os.path.join(destination_directory, file_name) |
|
|
|
|
|
os.makedirs(destination_path, exist_ok=True) |
|
|
|
|
|
shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav")) |
|
|
|
|
|
text = prompt |
|
|
|
|
|
|
|
|
|
|
|
|
|
output_dict = model.synthesize(text, config, speaker_id=f"{file_name}", voice_dirs="bark_voices/") |
|
print(output_dict) |
|
|
|
sample_rate = 24000 |
|
|
|
wavfile.write('output.wav', sample_rate, output_dict['wav']) |
|
|
|
|
|
return "output.wav" |
|
|
|
gr.Interface(fn=infer, |
|
inputs=[gr.Textbox(label="Text to speech prompt"), |
|
gr.Audio( |
|
label="WAV voice to clone", |
|
type="filepath", |
|
source="upload")], |
|
outputs=[gr.Audio()], |
|
title="Instant Voice Cloning").launch() |