SimpleRVC / app.py
nevreal's picture
english
106b914 verified
raw
history blame
8.42 kB
import gradio as gr
import os
from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES
import platform
from models.model import *
from tts.conversion import COQUI_LANGUAGES
import pytube
import os
import traceback
from pydub import AudioSegment
# from audio_enhance.functions import audio_enhance
def convert_yt_to_wav(url):
if not url:
return "Please enter the video link", None
try:
print(f"Converting video {url}...")
# Download the video using pytube
video = pytube.YouTube(url)
stream = video.streams.filter(only_audio=True).first()
video_output_folder = os.path.join(f"yt_videos") # Destination folder path
audio_output_folder = 'audios'
print("Downloading video")
video_file_path = stream.download(output_path=video_output_folder)
print(video_file_path)
file_name = os.path.basename(video_file_path)
audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav'))
# Convert mp4 to wav
print("Converting to wav")
sound = AudioSegment.from_file(video_file_path, format="mp4")
sound.export(audio_file_path, format="wav")
if os.path.exists(video_file_path):
os.remove(video_file_path)
return "Success", audio_file_path
except ConnectionResetError as cre:
return "Connection lost, please refresh or try again later.", None
except Exception as e:
return str(e), None
with gr.Blocks() as app:
gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
gr.HTML("<h4> The current space uses only CPU, so it's only for inference. It is recommended to duplicate the space to avoid issues with processing queues. </h4>")
gr.Markdown("Simple RVC GPU Inference on Colab: [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/drive/1NKqqTR04HujeBxzwe7jbYEvNi8LbxD_N?usp=sharing)")
gr.Markdown(
"[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/juuxn/SimpleRVC?duplicate=true)\n\n"
)
gr.Markdown("Collection of models you can use: RVC + AI Kits. **[RVC Community Models](https://docs.google.com/spreadsheets/d/1owfUtQuLW9ReiIwg6U9UkkDmPOTkuNHf0OKQtWu1iaI)**")
with gr.Tab("Inference"):
model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
with gr.Row():
with gr.Column():
audio_path = gr.Audio(label="Audio File", show_label=True, type="filepath")
index_rate = gr.Slider(minimum=0, maximum=1, label="Search feature ratio:", value=0.75, interactive=True)
filter_radius1 = gr.Slider(minimum=0, maximum=7, label="Filter (breathing roughness reduction)", value=3, step=1, interactive=True)
with gr.Column():
f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"],
value="rmvpe",
label="Algorithm", show_label=True)
vc_transform0 = gr.Slider(minimum=-12, label="Number of semitones, up an octave: 12, down an octave: -12", value=0, maximum=12, step=1)
protect0 = gr.Slider(
minimum=0, maximum=0.5, label="Protect voiceless consonants and breathing sounds. 0.5 to disable.", value=0.33,
step=0.01,
interactive=True,
)
resample_sr1 = gr.Slider(
minimum=0,
maximum=48000,
label="Resample the output audio to the final sampling rate. 0 for no resampling.",
value=0,
step=1,
interactive=True,
)
# Output
with gr.Row():
vc_output1 = gr.Textbox(label="Output")
vc_output2 = gr.Audio(label="Output Audio")
btn = gr.Button(value="Convert")
btn.click(infer, inputs=[model_url, f0_method, audio_path, index_rate, vc_transform0, protect0, resample_sr1, filter_radius1], outputs=[vc_output1, vc_output2])
with gr.TabItem("TTS"):
with gr.Row():
tts_text = gr.Textbox(
label="Text:",
placeholder="Text you want to convert to speech...",
lines=6,
)
with gr.Column():
with gr.Row():
tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="RVC Model URL", show_label=True)
with gr.Row():
tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="TTS Method:", visible=True)
tts_model = gr.Dropdown(choices=EDGE_VOICES, label="TTS Model:", visible=True, interactive=True)
tts_api_key = gr.Textbox(label="ElevenLabs API Key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a", interactive=True, visible=False)
tts_coqui_languages = gr.Radio(
label="Language",
choices=COQUI_LANGUAGES,
value="en",
visible=False
)
tts_btn = gr.Button(value="Convert")
with gr.Row():
tts_vc_output1 = gr.Textbox(label="Output")
tts_vc_output2 = gr.Audio(label="Output Audio")
tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
tts_msg = gr.Markdown("""**I recommend creating an Eleven Labs account and entering your API key; it's free and you have a limit of 10k characters per month.** <br/>
![Imgur](https://imgur.com/HH6YTu0.png)
""", visible=False)
tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
with gr.TabItem("Youtube"):
gr.Markdown("## Convert YouTube video to audio")
with gr.Row():
yt_url = gr.Textbox(
label="Video URL:",
placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q"
)
yt_btn = gr.Button(value="Convert")
with gr.Row():
yt_output1 = gr.Textbox(label="Output")
yt_output2 = gr.Audio(label="Output Audio")
yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2])
with gr.Tab("Models"):
gr.HTML("<h4>Search models</h4>")
search_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True)
# Output
with gr.Row():
search_output = gr.Markdown(label="Output")
btn_search_model = gr.Button(value="Search")
btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[search_output])
gr.HTML("<h4>Submit your model</h4>")
post_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True)
post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
post_creator = gr.Textbox(placeholder="Discord ID or link to creator's profile", label="Creator", show_label=True)
post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Version", show_label=True)
# Output
with gr.Row():
post_output = gr.Markdown(label="Output")
btn_post_model = gr.Button(value="Post")
btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
app.queue(concurrency_count=200, max_size=1022).launch()
#share=True