HRVC / src /ultimate_rvc /web /tabs /one_click_generation.py
SpyC0der77's picture
Upload folder using huggingface_hub
1a942eb verified
"""Module which defines the code for the "One-click generation" tab."""
from collections.abc import Sequence
from functools import partial
import gradio as gr
from ultimate_rvc.core.generate.song_cover import run_pipeline
from ultimate_rvc.typing_extra import AudioExt, F0Method, SampleRate
from ultimate_rvc.web.common import (
PROGRESS_BAR,
exception_harness,
toggle_visible_component,
update_cached_songs,
update_output_audio,
update_song_cover_name,
update_value,
)
from ultimate_rvc.web.typing_extra import ConcurrencyId, SourceType
def _toggle_intermediate_audio(
visible: bool,
) -> list[gr.Accordion]:
"""
Toggle the visibility of intermediate audio accordions.
Parameters
----------
visible : bool
Visibility status of the intermediate audio accordions.
Returns
-------
list[gr.Accordion]
The intermediate audio accordions.
"""
accordions = [gr.Accordion(open=False) for _ in range(7)]
return [gr.Accordion(visible=visible, open=False), *accordions]
def render(
song_dirs: Sequence[gr.Dropdown],
cached_song_1click: gr.Dropdown,
cached_song_multi: gr.Dropdown,
model_1click: gr.Dropdown,
intermediate_audio: gr.Dropdown,
output_audio: gr.Dropdown,
) -> None:
"""
Render "One-click generation" tab.
Parameters
----------
song_dirs : Sequence[gr.Dropdown]
Dropdowns for selecting song directories in the
"Multi-step generation" tab.
cached_song_1click : gr.Dropdown
Dropdown for selecting a cached song in the
"One-click generation" tab
cached_song_multi : gr.Dropdown
Dropdown for selecting a cached song in the
"Multi-step generation" tab
model_1click : gr.Dropdown
Dropdown for selecting voice model in the
"One-click generation" tab.
intermediate_audio : gr.Dropdown
Dropdown for selecting intermediate audio files to delete in the
"Delete audio" tab.
output_audio : gr.Dropdown
Dropdown for selecting output audio files to delete in the
"Delete audio" tab.
"""
with gr.Tab("One-click generation"):
with gr.Accordion("Main options"):
with gr.Row():
with gr.Column():
source_type = gr.Dropdown(
list(SourceType),
value=SourceType.PATH,
label="Source type",
type="index",
info="The type of source to retrieve a song from.",
)
with gr.Column():
source = gr.Textbox(
label="Source",
info=(
"Link to a song on YouTube or the full path of a local"
" audio file."
),
)
local_file = gr.Audio(
label="Source",
type="filepath",
visible=False,
)
cached_song_1click.render()
source_type.input(
partial(toggle_visible_component, 3),
inputs=source_type,
outputs=[source, local_file, cached_song_1click],
show_progress="hidden",
)
local_file.change(
update_value,
inputs=local_file,
outputs=source,
show_progress="hidden",
)
cached_song_1click.input(
update_value,
inputs=cached_song_1click,
outputs=source,
show_progress="hidden",
)
with gr.Row():
model_1click.render()
n_octaves = gr.Slider(
-3,
3,
value=0,
step=1,
label="Vocal pitch shift",
info=(
"The number of octaves to pitch-shift converted vocals by."
" Use 1 for male-to-female and -1 for vice-versa."
),
)
n_semitones = gr.Slider(
-12,
12,
value=0,
step=1,
label="Overall pitch shift",
info=(
"The number of semi-tones to pitch-shift converted vocals,"
" instrumentals, and backup vocals by."
),
)
with gr.Accordion("Vocal conversion options", open=False):
with gr.Row():
index_rate = gr.Slider(
0,
1,
value=0.5,
label="Index rate",
info=(
"How much of the accent in the voice model to keep in the"
" converted vocals. Increase to bias the conversion towards the"
" accent of the voice model."
),
)
filter_radius = gr.Slider(
0,
7,
value=3,
step=1,
label="Filter radius",
info=(
"If >=3: apply median filtering to harvested pitch results."
" Can help reduce breathiness in the converted vocals."
),
)
rms_mix_rate = gr.Slider(
0,
1,
value=0.25,
label="RMS mix rate",
info=(
"How much to mimic the loudness (0) of the input vocals or a"
" fixed loudness (1)."
"<br><br>"
),
)
with gr.Row():
protect = gr.Slider(
0,
0.5,
value=0.33,
label="Protect rate",
info=(
"Protection of voiceless consonants and breath sounds. Decrease"
" to increase protection at the cost of indexing accuracy. Set"
" to 0.5 to disable."
"<br><br>"
),
)
f0_method = gr.Dropdown(
list(F0Method),
value=F0Method.RMVPE,
label="Pitch detection algorithm",
info=(
"The method to use for pitch detection. Best option is RMVPE"
" (clarity in vocals), then Mangio-CREPE (smoother vocals)."
"<br><br>"
),
)
hop_length = gr.Slider(
32,
320,
value=128,
step=1,
label="Hop length",
info=(
"How often the CREPE-based pitch detection algorithm checks for"
" pitch changes. Measured in milliseconds. Lower values lead to"
" longer conversion times and a higher risk of voice cracks,"
" but better pitch accuracy."
),
)
with gr.Accordion("Audio mixing options", open=False):
gr.Markdown("")
gr.Markdown("**Reverb control on converted vocals**")
with gr.Row():
room_size = gr.Slider(
0,
1,
value=0.15,
label="Room size",
info=(
"Size of the room which reverb effect simulates. Increase for"
" longer reverb time."
),
)
with gr.Row():
wet_level = gr.Slider(
0,
1,
value=0.2,
label="Wetness level",
info="Loudness of converted vocals with reverb effect applied.",
)
dry_level = gr.Slider(
0,
1,
value=0.8,
label="Dryness level",
info="Loudness of converted vocals without reverb effect applied.",
)
damping = gr.Slider(
0,
1,
value=0.7,
label="Damping level",
info="Absorption of high frequencies in reverb effect.",
)
gr.Markdown("")
gr.Markdown("**Volume controls (dB)**")
with gr.Row():
main_gain = gr.Slider(-20, 20, value=0, step=1, label="Main vocals")
inst_gain = gr.Slider(-20, 20, value=0, step=1, label="Instrumentals")
backup_gain = gr.Slider(-20, 20, value=0, step=1, label="Backup vocals")
with gr.Accordion("Audio output options", open=False):
with gr.Row():
output_name = gr.Textbox(
value=partial(
update_song_cover_name,
None,
update_placeholder=True,
),
inputs=[cached_song_1click, model_1click],
label="Output name",
info=(
"If no name is provided, a suitable name will be generated"
" automatically."
),
placeholder="Ultimate RVC song cover",
)
output_sr = gr.Dropdown(
choices=list(SampleRate),
value=SampleRate.HZ_44100,
label="Output sample rate",
info="The sample rate to save the generated song cover in.",
)
output_format = gr.Dropdown(
list(AudioExt),
value=AudioExt.MP3,
label="Output format",
info="The format to save the generated song cover in.",
)
with gr.Row():
show_intermediate_audio = gr.Checkbox(
label="Show intermediate audio",
value=False,
info=(
"Show intermediate audio tracks generated during song cover"
" generation."
),
)
intermediate_audio_accordions = [
gr.Accordion(label, open=False, render=False)
for label in [
"Step 0: song retrieval",
"Step 1a: vocals/instrumentals separation",
"Step 1b: main vocals/ backup vocals separation",
"Step 1c: main vocals cleanup",
"Step 2: conversion of main vocals",
"Step 3: post-processing of converted vocals",
"Step 4: pitch shift of background tracks",
]
]
(
song_retrieval_accordion,
vocals_separation_accordion,
main_vocals_separation_accordion,
vocal_cleanup_accordion,
vocal_conversion_accordion,
vocals_postprocessing_accordion,
pitch_shift_accordion,
) = intermediate_audio_accordions
intermediate_audio_tracks = [
gr.Audio(label=label, type="filepath", interactive=False, render=False)
for label in [
"Song",
"Vocals",
"Instrumentals",
"Main vocals",
"Backup vocals",
"De-reverbed main vocals",
"Main vocals reverb",
"Converted vocals",
"Post-processed vocals",
"Pitch-shifted instrumentals",
"Pitch-shifted backup vocals",
]
]
(
song,
vocals_track,
instrumentals_track,
main_vocals_track,
backup_vocals_track,
main_vocals_dereverbed_track,
main_vocals_reverb_track,
converted_vocals_track,
postprocessed_vocals_track,
instrumentals_shifted_track,
backup_vocals_shifted_track,
) = intermediate_audio_tracks
with gr.Accordion(
"Intermediate audio tracks",
open=False,
visible=False,
) as intermediate_audio_accordion:
song_retrieval_accordion.render()
with song_retrieval_accordion:
song.render()
vocals_separation_accordion.render()
with vocals_separation_accordion, gr.Row():
vocals_track.render()
instrumentals_track.render()
main_vocals_separation_accordion.render()
with main_vocals_separation_accordion, gr.Row():
main_vocals_track.render()
backup_vocals_track.render()
vocal_cleanup_accordion.render()
with vocal_cleanup_accordion, gr.Row():
main_vocals_dereverbed_track.render()
main_vocals_reverb_track.render()
vocal_conversion_accordion.render()
with vocal_conversion_accordion:
converted_vocals_track.render()
vocals_postprocessing_accordion.render()
with vocals_postprocessing_accordion:
postprocessed_vocals_track.render()
pitch_shift_accordion.render()
with pitch_shift_accordion, gr.Row():
instrumentals_shifted_track.render()
backup_vocals_shifted_track.render()
show_intermediate_audio.change(
_toggle_intermediate_audio,
inputs=show_intermediate_audio,
outputs=[
intermediate_audio_accordion,
*intermediate_audio_accordions,
],
show_progress="hidden",
)
with gr.Row(equal_height=True):
reset_btn = gr.Button(value="Reset settings", scale=2)
generate_btn = gr.Button("Generate", scale=2, variant="primary")
song_cover = gr.Audio(label="Song cover", scale=3)
generate_btn.click(
partial(
exception_harness(
run_pipeline,
info_msg="Song cover generated successfully!",
),
progress_bar=PROGRESS_BAR,
),
inputs=[
source,
model_1click,
n_octaves,
n_semitones,
f0_method,
index_rate,
filter_radius,
rms_mix_rate,
protect,
hop_length,
room_size,
wet_level,
dry_level,
damping,
main_gain,
inst_gain,
backup_gain,
output_sr,
output_format,
output_name,
],
outputs=[song_cover, *intermediate_audio_tracks],
concurrency_limit=1,
concurrency_id=ConcurrencyId.GPU,
).success(
partial(
update_cached_songs,
3 + len(song_dirs),
[],
[2],
),
outputs=[
cached_song_1click,
cached_song_multi,
intermediate_audio,
*song_dirs,
],
show_progress="hidden",
).then(
partial(update_output_audio, 1, [], [0]),
outputs=[output_audio],
show_progress="hidden",
)
reset_btn.click(
lambda: [
0,
0,
0.5,
3,
0.25,
0.33,
F0Method.RMVPE,
128,
0.15,
0.2,
0.8,
0.7,
0,
0,
0,
SampleRate.HZ_44100,
AudioExt.MP3,
False,
],
outputs=[
n_octaves,
n_semitones,
index_rate,
filter_radius,
rms_mix_rate,
protect,
f0_method,
hop_length,
room_size,
wet_level,
dry_level,
damping,
main_gain,
inst_gain,
backup_gain,
output_sr,
output_format,
show_intermediate_audio,
],
show_progress="hidden",
)