Spaces:

SpyC0der77
/

HRVC

Runtime error

App Files Files Community

HRVC / src /ultimate_rvc /cli /generate /song_cover.py

SpyC0der77

Upload folder using huggingface_hub

1a942eb verified 5 months ago

raw

history blame contribute delete

12.3 kB

	"""
	Module which defines the command-line interface for generating a song
	cover.
	"""

	from typing import Annotated

	from pathlib import Path

	import typer
	from rich import print as rprint
	from rich.panel import Panel
	from rich.table import Table

	from ultimate_rvc.core.generate.song_cover import run_pipeline as _run_pipeline
	from ultimate_rvc.core.generate.song_cover import to_wav as _to_wav
	from ultimate_rvc.typing_extra import AudioExt, F0Method

	app = typer.Typer(
	name="song-cover",
	no_args_is_help=True,
	help="Generate song covers",
	rich_markup_mode="markdown",
	)


	def complete_name(incomplete: str, enumeration: list[str]) -> list[str]:
	"""
	Return a list of names that start with the incomplete string.

	Parameters
	----------
	incomplete : str
	The incomplete string to complete.
	enumeration : list[str]
	The list of names to complete from.

	Returns
	-------
	list[str]
	The list of names that start with the incomplete string.

	"""
	return [name for name in list(enumeration) if name.startswith(incomplete)]


	def complete_audio_ext(incomplete: str) -> list[str]:
	"""
	Return a list of audio extensions that start with the incomplete
	string.

	Parameters
	----------
	incomplete : str
	The incomplete string to complete.

	Returns
	-------
	list[str]
	The list of audio extensions that start with the incomplete
	string.

	"""
	return complete_name(incomplete, list(AudioExt))


	def complete_f0_method(incomplete: str) -> list[str]:
	"""
	Return a list of F0 methods that start with the incomplete string.

	Parameters
	----------
	incomplete : str
	The incomplete string to complete.

	Returns
	-------
	list[str]
	The list of F0 methods that start with the incomplete string.

	"""
	return complete_name(incomplete, list(F0Method))


	@app.command(no_args_is_help=True)
	def to_wav(
	audio_track: Annotated[
	Path,
	typer.Argument(
	help="The path to the audio track to convert.",
	exists=True,
	file_okay=True,
	dir_okay=False,
	resolve_path=True,
	),
	],
	song_dir: Annotated[
	Path,
	typer.Argument(
	help=(
	"The path to the song directory where the converted audio track will be"
	" saved."
	),
	exists=True,
	file_okay=False,
	dir_okay=True,
	resolve_path=True,
	),
	],
	prefix: Annotated[
	str,
	typer.Argument(
	help="The prefix to use for the name of the converted audio track.",
	),
	],
	accepted_format: Annotated[
	list[AudioExt] \| None,
	typer.Option(
	case_sensitive=False,
	autocompletion=complete_audio_ext,
	help=(
	"An audio format to accept for conversion. This option can be used"
	" multiple times to accept multiple formats. If not provided, the"
	" default accepted formats are mp3, ogg, flac, m4a and aac."
	),
	),
	] = None,
	) -> None:
	"""
	Convert a given audio track to wav format if its current format
	is an accepted format. See the --accepted-formats option for more
	information on accepted formats.

	"""
	rprint()
	wav_path = _to_wav(
	audio_track=audio_track,
	song_dir=song_dir,
	prefix=prefix,
	accepted_formats=set(accepted_format) if accepted_format else None,
	)
	if wav_path == audio_track:
	rprint(
	"[+] Audio track was not converted to WAV format. Presumably, "
	"its format is not in the given list of accepted formats.",
	)
	else:
	rprint("[+] Audio track succesfully converted to WAV format!")
	rprint(Panel(f"[green]{wav_path}", title="WAV Audio Track Path"))


	@app.command(no_args_is_help=True)
	def run_pipeline(
	source: Annotated[
	str,
	typer.Argument(
	help=(
	"A Youtube URL, the path to a local audio file or the path to a"
	" song directory."
	),
	),
	],
	model_name: Annotated[
	str,
	typer.Argument(help="The name of the voice model to use for vocal conversion."),
	],
	n_octaves: Annotated[
	int,
	typer.Option(
	rich_help_panel="Vocal Conversion Options",
	help=(
	"The number of octaves to pitch-shift the converted vocals by.Use 1 for"
	" male-to-female and -1 for vice-versa."
	),
	),
	] = 0,
	n_semitones: Annotated[
	int,
	typer.Option(
	rich_help_panel="Vocal Conversion Options",
	help=(
	"The number of semi-tones to pitch-shift the converted vocals,"
	" instrumentals, and backup vocals by. Altering this slightly reduces"
	" sound quality"
	),
	),
	] = 0,
	f0_method: Annotated[
	F0Method,
	typer.Option(
	case_sensitive=False,
	autocompletion=complete_f0_method,
	rich_help_panel="Vocal Conversion Options",
	help=(
	"The method to use for pitch detection during vocal conversion. Best"
	" option is RMVPE (clarity in vocals), then Mangio-Crepe (smoother"
	" vocals)."
	),
	),
	] = F0Method.RMVPE,
	index_rate: Annotated[
	float,
	typer.Option(
	min=0,
	max=1,
	rich_help_panel="Vocal Conversion Options",
	help=(
	"A decimal number e.g. 0.5, Controls how much of the accent in the"
	" voice model to keep in the converted vocals. Increase to bias the"
	" conversion towards the accent of the voice model."
	),
	),
	] = 0.5,
	filter_radius: Annotated[
	int,
	typer.Option(
	min=0,
	max=7,
	rich_help_panel="Vocal Conversion Options",
	help=(
	"A number between 0 and 7. If >=3: apply median filtering to the pitch"
	" results harvested during vocal conversion. Can help reduce"
	" breathiness in the converted vocals."
	),
	),
	] = 3,
	rms_mix_rate: Annotated[
	float,
	typer.Option(
	min=0,
	max=1,
	rich_help_panel="Vocal Conversion Options",
	help=(
	"A decimal number e.g. 0.25. Controls how much to mimic the loudness of"
	" the input vocals (0) or a fixed loudness (1) during vocal conversion."
	),
	),
	] = 0.25,
	protect: Annotated[
	float,
	typer.Option(
	min=0,
	max=0.5,
	rich_help_panel="Vocal Conversion Options",
	help=(
	"A decimal number e.g. 0.33. Controls protection of voiceless"
	" consonants and breath sounds during vocal conversion. Decrease to"
	" increase protection at the cost of indexing accuracy. Set to 0.5 to"
	" disable."
	),
	),
	] = 0.33,
	hop_length: Annotated[
	int,
	typer.Option(
	rich_help_panel="Vocal Conversion Options",
	help=(
	"Controls how often the CREPE-based pitch detection algorithm checks"
	" for pitch changes during vocal conversion. Measured in milliseconds."
	" Lower values lead to longer conversion times and a higher risk of"
	" voice cracks, but better pitch accuracy. Recommended value: 128."
	),
	),
	] = 128,
	room_size: Annotated[
	float,
	typer.Option(
	min=0,
	max=1,
	rich_help_panel="Vocal Post-processing Options",
	help=(
	"The room size of the reverb effect applied to the converted vocals."
	" Increase for longer reverb time. Should be a value between 0 and 1."
	),
	),
	] = 0.15,
	wet_level: Annotated[
	float,
	typer.Option(
	min=0,
	max=1,
	rich_help_panel="Vocal Post-processing Options",
	help=(
	"The loudness of the converted vocals with reverb effect applied."
	" Should be a value between 0 and 1"
	),
	),
	] = 0.2,
	dry_level: Annotated[
	float,
	typer.Option(
	min=0,
	max=1,
	rich_help_panel="Vocal Post-processing Options",
	help=(
	"The loudness of the converted vocals wihout reverb effect applied."
	" Should be a value between 0 and 1."
	),
	),
	] = 0.8,
	damping: Annotated[
	float,
	typer.Option(
	min=0,
	max=1,
	rich_help_panel="Vocal Post-processing Options",
	help=(
	"The absorption of high frequencies in the reverb effect applied to the"
	" converted vocals. Should be a value between 0 and 1."
	),
	),
	] = 0.7,
	main_gain: Annotated[
	int,
	typer.Option(
	rich_help_panel="Audio Mixing Options",
	help="The gain to apply to the post-processed vocals. Measured in dB.",
	),
	] = 0,
	inst_gain: Annotated[
	int,
	typer.Option(
	rich_help_panel="Audio Mixing Options",
	help=(
	"The gain to apply to the pitch-shifted instrumentals. Measured in dB."
	),
	),
	] = 0,
	backup_gain: Annotated[
	int,
	typer.Option(
	rich_help_panel="Audio Mixing Options",
	help=(
	"The gain to apply to the pitch-shifted backup vocals. Measured in dB."
	),
	),
	] = 0,
	output_sr: Annotated[
	int,
	typer.Option(
	rich_help_panel="Audio Mixing Options",
	help="The sample rate of the song cover.",
	),
	] = 44100,
	output_format: Annotated[
	AudioExt,
	typer.Option(
	case_sensitive=False,
	autocompletion=complete_audio_ext,
	rich_help_panel="Audio Mixing Options",
	help="The audio format of the song cover.",
	),
	] = AudioExt.MP3,
	output_name: Annotated[
	str \| None,
	typer.Option(
	rich_help_panel="Audio Mixing Options",
	help="The name of the song cover.",
	),
	] = None,
	) -> None:
	"""Run the song cover generation pipeline."""
	[song_cover_path, *intermediate_audio_file_paths] = _run_pipeline(
	source=source,
	model_name=model_name,
	n_octaves=n_octaves,
	n_semitones=n_semitones,
	f0_method=f0_method,
	index_rate=index_rate,
	filter_radius=filter_radius,
	rms_mix_rate=rms_mix_rate,
	protect=protect,
	hop_length=hop_length,
	room_size=room_size,
	wet_level=wet_level,
	dry_level=dry_level,
	damping=damping,
	main_gain=main_gain,
	inst_gain=inst_gain,
	backup_gain=backup_gain,
	output_sr=output_sr,
	output_format=output_format,
	output_name=output_name,
	progress_bar=None,
	)
	table = Table()
	table.add_column("Type")
	table.add_column("Path")
	for name, path in zip(
	[
	"Song",
	"Vocals",
	"Instrumentals",
	"Main vocals",
	"Backup vocals",
	"De-reverbed main vocals",
	"Main vocals reverb",
	"Converted vocals",
	"Post-processed vocals",
	"Pitch-shifted instrumentals",
	"Pitch-shifted backup vocals",
	],
	intermediate_audio_file_paths,
	strict=True,
	):
	table.add_row(name, f"[green]{path}")
	rprint("[+] Song cover succesfully generated!")
	rprint(Panel(f"[green]{song_cover_path}", title="Song Cover Path"))
	rprint(Panel(table, title="Intermediate Audio Files"))