Spaces:

fffiloni
/

instant-TTS-Bark-cloning

Paused

App Files Files Community

instant-TTS-Bark-cloning / app.py

fffiloni

Update app.py

e236784 over 1 year ago

raw

history blame

2.08 kB

	import gradio as gr
	import os
	import shutil

	from huggingface_hub import snapshot_download
	import numpy as np
	from scipy.io import wavfile


	model_ids = [
	'suno/bark',
	]
	for model_id in model_ids:
	model_name = model_id.split('/')[-1]
	snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')

	from TTS.tts.configs.bark_config import BarkConfig
	from TTS.tts.models.bark import Bark

	config = BarkConfig()
	model = Bark.init_from_config(config)
	model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)

	def infer(prompt, input_wav_file):

	# Path to your WAV file
	source_path = input_wav_file

	# Destination directory
	destination_directory = "bark_voices"

	# Extract the file name without the extension
	file_name = os.path.splitext(os.path.basename(source_path))[0]

	# Construct the full destination directory path
	destination_path = os.path.join(destination_directory, file_name)

	# Create the new directory
	os.makedirs(destination_path, exist_ok=True)

	# Move the WAV file to the new directory
	shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav"))


	text = prompt

	# with random speaker
	#output_dict = model.synthesize(text, config, speaker_id="random", voice_dirs=None)

	# cloning a speaker.
	# It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
	output_dict = model.synthesize(text, config, speaker_id=f"{file_name}", voice_dirs="bark_voices/")
	print(output_dict)

	sample_rate = 24000 # Replace with the actual sample rate

	wavfile.write('output.wav', sample_rate, output_dict['wav'])


	return "output.wav"

	gr.Interface(fn=infer,
	inputs=[gr.Textbox(label="Text to speech prompt"),
	gr.Audio(
	label="WAV voice to clone",
	type="filepath",
	source="upload")],
	outputs=[gr.Audio()],
	title="Instant Voice Cloning").launch()