Spaces:

SpyC0der77
/

HRVC

Runtime error

App Files Files Community

HRVC / src /ultimate_rvc /web /tabs /one_click_generation.py

SpyC0der77

Upload folder using huggingface_hub

1a942eb verified 8 months ago

raw

history blame contribute delete

17.3 kB

	"""Module which defines the code for the "One-click generation" tab."""

	from collections.abc import Sequence
	from functools import partial

	import gradio as gr

	from ultimate_rvc.core.generate.song_cover import run_pipeline
	from ultimate_rvc.typing_extra import AudioExt, F0Method, SampleRate
	from ultimate_rvc.web.common import (
	PROGRESS_BAR,
	exception_harness,
	toggle_visible_component,
	update_cached_songs,
	update_output_audio,
	update_song_cover_name,
	update_value,
	)
	from ultimate_rvc.web.typing_extra import ConcurrencyId, SourceType


	def _toggle_intermediate_audio(
	visible: bool,
	) -> list[gr.Accordion]:
	"""
	Toggle the visibility of intermediate audio accordions.

	Parameters
	----------
	visible : bool
	Visibility status of the intermediate audio accordions.

	Returns
	-------
	list[gr.Accordion]
	The intermediate audio accordions.

	"""
	accordions = [gr.Accordion(open=False) for _ in range(7)]
	return [gr.Accordion(visible=visible, open=False), *accordions]


	def render(
	song_dirs: Sequence[gr.Dropdown],
	cached_song_1click: gr.Dropdown,
	cached_song_multi: gr.Dropdown,
	model_1click: gr.Dropdown,
	intermediate_audio: gr.Dropdown,
	output_audio: gr.Dropdown,
	) -> None:
	"""
	Render "One-click generation" tab.

	Parameters
	----------
	song_dirs : Sequence[gr.Dropdown]
	Dropdowns for selecting song directories in the
	"Multi-step generation" tab.
	cached_song_1click : gr.Dropdown
	Dropdown for selecting a cached song in the
	"One-click generation" tab
	cached_song_multi : gr.Dropdown
	Dropdown for selecting a cached song in the
	"Multi-step generation" tab
	model_1click : gr.Dropdown
	Dropdown for selecting voice model in the
	"One-click generation" tab.
	intermediate_audio : gr.Dropdown
	Dropdown for selecting intermediate audio files to delete in the
	"Delete audio" tab.
	output_audio : gr.Dropdown
	Dropdown for selecting output audio files to delete in the
	"Delete audio" tab.

	"""
	with gr.Tab("One-click generation"):
	with gr.Accordion("Main options"):
	with gr.Row():
	with gr.Column():
	source_type = gr.Dropdown(
	list(SourceType),
	value=SourceType.PATH,
	label="Source type",
	type="index",
	info="The type of source to retrieve a song from.",
	)
	with gr.Column():
	source = gr.Textbox(
	label="Source",
	info=(
	"Link to a song on YouTube or the full path of a local"
	" audio file."
	),
	)
	local_file = gr.Audio(
	label="Source",
	type="filepath",
	visible=False,
	)
	cached_song_1click.render()
	source_type.input(
	partial(toggle_visible_component, 3),
	inputs=source_type,
	outputs=[source, local_file, cached_song_1click],
	show_progress="hidden",
	)

	local_file.change(
	update_value,
	inputs=local_file,
	outputs=source,
	show_progress="hidden",
	)
	cached_song_1click.input(
	update_value,
	inputs=cached_song_1click,
	outputs=source,
	show_progress="hidden",
	)
	with gr.Row():
	model_1click.render()
	n_octaves = gr.Slider(
	-3,
	3,
	value=0,
	step=1,
	label="Vocal pitch shift",
	info=(
	"The number of octaves to pitch-shift converted vocals by."
	" Use 1 for male-to-female and -1 for vice-versa."
	),
	)
	n_semitones = gr.Slider(
	-12,
	12,
	value=0,
	step=1,
	label="Overall pitch shift",
	info=(
	"The number of semi-tones to pitch-shift converted vocals,"
	" instrumentals, and backup vocals by."
	),
	)

	with gr.Accordion("Vocal conversion options", open=False):
	with gr.Row():
	index_rate = gr.Slider(
	0,
	1,
	value=0.5,
	label="Index rate",
	info=(
	"How much of the accent in the voice model to keep in the"
	" converted vocals. Increase to bias the conversion towards the"
	" accent of the voice model."
	),
	)
	filter_radius = gr.Slider(
	0,
	7,
	value=3,
	step=1,
	label="Filter radius",
	info=(
	"If >=3: apply median filtering to harvested pitch results."
	" Can help reduce breathiness in the converted vocals."
	),
	)
	rms_mix_rate = gr.Slider(
	0,
	1,
	value=0.25,
	label="RMS mix rate",
	info=(
	"How much to mimic the loudness (0) of the input vocals or a"
	" fixed loudness (1)."
	"<br><br>"
	),
	)
	with gr.Row():
	protect = gr.Slider(
	0,
	0.5,
	value=0.33,
	label="Protect rate",
	info=(
	"Protection of voiceless consonants and breath sounds. Decrease"
	" to increase protection at the cost of indexing accuracy. Set"
	" to 0.5 to disable."
	"<br><br>"
	),
	)
	f0_method = gr.Dropdown(
	list(F0Method),
	value=F0Method.RMVPE,
	label="Pitch detection algorithm",
	info=(
	"The method to use for pitch detection. Best option is RMVPE"
	" (clarity in vocals), then Mangio-CREPE (smoother vocals)."
	"<br><br>"
	),
	)
	hop_length = gr.Slider(
	32,
	320,
	value=128,
	step=1,
	label="Hop length",
	info=(
	"How often the CREPE-based pitch detection algorithm checks for"
	" pitch changes. Measured in milliseconds. Lower values lead to"
	" longer conversion times and a higher risk of voice cracks,"
	" but better pitch accuracy."
	),
	)
	with gr.Accordion("Audio mixing options", open=False):
	gr.Markdown("")
	gr.Markdown("Reverb control on converted vocals")
	with gr.Row():
	room_size = gr.Slider(
	0,
	1,
	value=0.15,
	label="Room size",
	info=(
	"Size of the room which reverb effect simulates. Increase for"
	" longer reverb time."
	),
	)
	with gr.Row():
	wet_level = gr.Slider(
	0,
	1,
	value=0.2,
	label="Wetness level",
	info="Loudness of converted vocals with reverb effect applied.",
	)
	dry_level = gr.Slider(
	0,
	1,
	value=0.8,
	label="Dryness level",
	info="Loudness of converted vocals without reverb effect applied.",
	)
	damping = gr.Slider(
	0,
	1,
	value=0.7,
	label="Damping level",
	info="Absorption of high frequencies in reverb effect.",
	)

	gr.Markdown("")
	gr.Markdown("Volume controls (dB)")
	with gr.Row():
	main_gain = gr.Slider(-20, 20, value=0, step=1, label="Main vocals")
	inst_gain = gr.Slider(-20, 20, value=0, step=1, label="Instrumentals")
	backup_gain = gr.Slider(-20, 20, value=0, step=1, label="Backup vocals")
	with gr.Accordion("Audio output options", open=False):
	with gr.Row():
	output_name = gr.Textbox(
	value=partial(
	update_song_cover_name,
	None,
	update_placeholder=True,
	),
	inputs=[cached_song_1click, model_1click],
	label="Output name",
	info=(
	"If no name is provided, a suitable name will be generated"
	" automatically."
	),
	placeholder="Ultimate RVC song cover",
	)
	output_sr = gr.Dropdown(
	choices=list(SampleRate),
	value=SampleRate.HZ_44100,
	label="Output sample rate",
	info="The sample rate to save the generated song cover in.",
	)
	output_format = gr.Dropdown(
	list(AudioExt),
	value=AudioExt.MP3,
	label="Output format",
	info="The format to save the generated song cover in.",
	)
	with gr.Row():
	show_intermediate_audio = gr.Checkbox(
	label="Show intermediate audio",
	value=False,
	info=(
	"Show intermediate audio tracks generated during song cover"
	" generation."
	),
	)

	intermediate_audio_accordions = [
	gr.Accordion(label, open=False, render=False)
	for label in [
	"Step 0: song retrieval",
	"Step 1a: vocals/instrumentals separation",
	"Step 1b: main vocals/ backup vocals separation",
	"Step 1c: main vocals cleanup",
	"Step 2: conversion of main vocals",
	"Step 3: post-processing of converted vocals",
	"Step 4: pitch shift of background tracks",
	]
	]
	(
	song_retrieval_accordion,
	vocals_separation_accordion,
	main_vocals_separation_accordion,
	vocal_cleanup_accordion,
	vocal_conversion_accordion,
	vocals_postprocessing_accordion,
	pitch_shift_accordion,
	) = intermediate_audio_accordions
	intermediate_audio_tracks = [
	gr.Audio(label=label, type="filepath", interactive=False, render=False)
	for label in [
	"Song",
	"Vocals",
	"Instrumentals",
	"Main vocals",
	"Backup vocals",
	"De-reverbed main vocals",
	"Main vocals reverb",
	"Converted vocals",
	"Post-processed vocals",
	"Pitch-shifted instrumentals",
	"Pitch-shifted backup vocals",
	]
	]
	(
	song,
	vocals_track,
	instrumentals_track,
	main_vocals_track,
	backup_vocals_track,
	main_vocals_dereverbed_track,
	main_vocals_reverb_track,
	converted_vocals_track,
	postprocessed_vocals_track,
	instrumentals_shifted_track,
	backup_vocals_shifted_track,
	) = intermediate_audio_tracks
	with gr.Accordion(
	"Intermediate audio tracks",
	open=False,
	visible=False,
	) as intermediate_audio_accordion:
	song_retrieval_accordion.render()
	with song_retrieval_accordion:
	song.render()
	vocals_separation_accordion.render()
	with vocals_separation_accordion, gr.Row():
	vocals_track.render()
	instrumentals_track.render()
	main_vocals_separation_accordion.render()
	with main_vocals_separation_accordion, gr.Row():
	main_vocals_track.render()
	backup_vocals_track.render()
	vocal_cleanup_accordion.render()
	with vocal_cleanup_accordion, gr.Row():
	main_vocals_dereverbed_track.render()
	main_vocals_reverb_track.render()
	vocal_conversion_accordion.render()
	with vocal_conversion_accordion:
	converted_vocals_track.render()
	vocals_postprocessing_accordion.render()
	with vocals_postprocessing_accordion:
	postprocessed_vocals_track.render()
	pitch_shift_accordion.render()
	with pitch_shift_accordion, gr.Row():
	instrumentals_shifted_track.render()
	backup_vocals_shifted_track.render()

	show_intermediate_audio.change(
	_toggle_intermediate_audio,
	inputs=show_intermediate_audio,
	outputs=[
	intermediate_audio_accordion,
	*intermediate_audio_accordions,
	],
	show_progress="hidden",
	)

	with gr.Row(equal_height=True):
	reset_btn = gr.Button(value="Reset settings", scale=2)
	generate_btn = gr.Button("Generate", scale=2, variant="primary")
	song_cover = gr.Audio(label="Song cover", scale=3)

	generate_btn.click(
	partial(
	exception_harness(
	run_pipeline,
	info_msg="Song cover generated successfully!",
	),
	progress_bar=PROGRESS_BAR,
	),
	inputs=[
	source,
	model_1click,
	n_octaves,
	n_semitones,
	f0_method,
	index_rate,
	filter_radius,
	rms_mix_rate,
	protect,
	hop_length,
	room_size,
	wet_level,
	dry_level,
	damping,
	main_gain,
	inst_gain,
	backup_gain,
	output_sr,
	output_format,
	output_name,
	],
	outputs=[song_cover, *intermediate_audio_tracks],
	concurrency_limit=1,
	concurrency_id=ConcurrencyId.GPU,
	).success(
	partial(
	update_cached_songs,
	3 + len(song_dirs),
	[],
	[2],
	),
	outputs=[
	cached_song_1click,
	cached_song_multi,
	intermediate_audio,
	*song_dirs,
	],
	show_progress="hidden",
	).then(
	partial(update_output_audio, 1, [], [0]),
	outputs=[output_audio],
	show_progress="hidden",
	)
	reset_btn.click(
	lambda: [
	0,
	0,
	0.5,
	3,
	0.25,
	0.33,
	F0Method.RMVPE,
	128,
	0.15,
	0.2,
	0.8,
	0.7,
	0,
	0,
	0,
	SampleRate.HZ_44100,
	AudioExt.MP3,
	False,
	],
	outputs=[
	n_octaves,
	n_semitones,
	index_rate,
	filter_radius,
	rms_mix_rate,
	protect,
	f0_method,
	hop_length,
	room_size,
	wet_level,
	dry_level,
	damping,
	main_gain,
	inst_gain,
	backup_gain,
	output_sr,
	output_format,
	show_intermediate_audio,
	],
	show_progress="hidden",
	)