Spaces:
Running
Running
File size: 3,061 Bytes
f94a020 bdeb120 17855f6 3251e7e 156316e fef87f0 58b2f84 fef87f0 156316e fef87f0 d3ac099 bdeb120 17855f6 d250b27 81e5784 d250b27 81e5784 d250b27 5f762c2 b37c4b7 a42bf65 17855f6 a42bf65 3251e7e fef87f0 b37c4b7 122c9ef 81e5784 f82b319 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
import os
import requests
import torch
import zipfile
from TTS.api import TTS
from pydub import AudioSegment
os.environ["COQUI_TOS_AGREED"] = "1"
MODEL_PATH = "tts_models/multilingual/multi-dataset/xtts_v2"
LANGUAGES = ["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko", "hi"]
AUDIO_FORMATS = [".wav", ".mp3", ".flac", ".mp4"]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
tts = TTS(MODEL_PATH).to(device)
def download_audio_file(url):
try:
response = requests.get(url)
file_extension = os.path.splitext(url)[-1].lower()
file_name = f"temp{file_extension}"
with open(file_name, "wb") as f:
f.write(response.content)
return file_name
except requests.exceptions.RequestException as e:
print(f"Error downloading audio file: {e}")
return None
def extract_zip_file(zip_file):
try:
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
zip_ref.extractall()
return True
except zipfile.BadZipfile as e:
print(f"Error extracting zip file: {e}")
return False
def convert_to_wav(input_audio_file):
file_extension = os.path.splitext(input_audio_file)[-1].lower()
if file_extension!= ".wav":
audio = AudioSegment.from_file(input_audio_file)
audio.export("temp.wav", format="wav")
os.remove(input_audio_file)
return "temp.wav"
return input_audio_file
def synthesize_text(text, input_audio_file, language):
input_audio_file = convert_to_wav(input_audio_file)
tts.tts_to_file(text=text, speaker_wav=input_audio_file, language=language, file_path="./output.wav")
return "./output.wav"
def clone(text, input_file, language, url=None):
if url is not None:
input_file = download_audio_file(url)
if input_file is None:
return None
if input_file.name.endswith(".zip"):
if extract_zip_file(input_file):
input_audio_file = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith(tuple(AUDIO_FORMATS))]
if len(input_audio_file) == 1:
input_audio_file = input_audio_file[0]
else:
return "Error: Please select a single audio file from the extracted files."
else:
input_audio_file = input_file.name
output_file_path = synthesize_text(text, input_audio_file, language)
return output_file_path
iface = gr.Interface(
fn=clone,
inputs=["text", gr.File(label="Input File", file_types=[".zip", *AUDIO_FORMATS]), gr.Dropdown(choices=LANGUAGES, label="Language"), gr.inputs.Textbox(label="URL", lines=1)],
outputs=gr.Audio(type='filepath'),
title='Voice Clone',
description="""
by [Angetyde](https://youtube.com/@Angetyde) and [Tony Assi](https://www.tonyassi.com/)
use this colab with caution <3.
""",
theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate")
)
iface.launch(share=True) |