File size: 3,061 Bytes
f94a020
 
 
bdeb120
17855f6
3251e7e
156316e
fef87f0
58b2f84
fef87f0
 
 
156316e
fef87f0
 
 
 
 
d3ac099
bdeb120
 
 
 
 
 
 
 
 
 
 
 
17855f6
 
 
 
 
 
 
 
 
d250b27
 
 
 
81e5784
d250b27
 
 
81e5784
d250b27
 
 
 
5f762c2
b37c4b7
 
 
a42bf65
 
17855f6
 
 
 
 
 
 
 
 
a42bf65
 
3251e7e
fef87f0
 
b37c4b7
122c9ef
81e5784
f82b319
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import os
import requests
import torch
import zipfile
from TTS.api import TTS
from pydub import AudioSegment

os.environ["COQUI_TOS_AGREED"] = "1"

MODEL_PATH = "tts_models/multilingual/multi-dataset/xtts_v2"
LANGUAGES = ["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko", "hi"]
AUDIO_FORMATS = [".wav", ".mp3", ".flac", ".mp4"]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

tts = TTS(MODEL_PATH).to(device)

def download_audio_file(url):
    try:
        response = requests.get(url)
        file_extension = os.path.splitext(url)[-1].lower()
        file_name = f"temp{file_extension}"
        with open(file_name, "wb") as f:
            f.write(response.content)
        return file_name
    except requests.exceptions.RequestException as e:
        print(f"Error downloading audio file: {e}")
        return None

def extract_zip_file(zip_file):
    try:
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall()
        return True
    except zipfile.BadZipfile as e:
        print(f"Error extracting zip file: {e}")
        return False

def convert_to_wav(input_audio_file):
    file_extension = os.path.splitext(input_audio_file)[-1].lower()
    if file_extension!= ".wav":
        audio = AudioSegment.from_file(input_audio_file)
        audio.export("temp.wav", format="wav")
        os.remove(input_audio_file)
        return "temp.wav"
    return input_audio_file

def synthesize_text(text, input_audio_file, language):
    input_audio_file = convert_to_wav(input_audio_file)
    tts.tts_to_file(text=text, speaker_wav=input_audio_file, language=language, file_path="./output.wav")
    return "./output.wav"

def clone(text, input_file, language, url=None):
    if url is not None:
        input_file = download_audio_file(url)
    if input_file is None:
        return None
    if input_file.name.endswith(".zip"):
        if extract_zip_file(input_file):
            input_audio_file = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith(tuple(AUDIO_FORMATS))]
            if len(input_audio_file) == 1:
                input_audio_file = input_audio_file[0]
            else:
                return "Error: Please select a single audio file from the extracted files."
    else:
        input_audio_file = input_file.name
    output_file_path = synthesize_text(text, input_audio_file, language)
    return output_file_path

iface = gr.Interface(
    fn=clone,
    inputs=["text", gr.File(label="Input File", file_types=[".zip", *AUDIO_FORMATS]), gr.Dropdown(choices=LANGUAGES, label="Language"), gr.inputs.Textbox(label="URL", lines=1)],
    outputs=gr.Audio(type='filepath'),
    title='Voice Clone',
    description="""
    by [Angetyde](https://youtube.com/@Angetyde) and [Tony Assi](https://www.tonyassi.com/)
    use this colab with caution <3.
    """,
    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate")
)

iface.launch(share=True)