Spaces:
Runtime error
Runtime error
""" | |
Module which defines the command-line interface for generating a song | |
cover. | |
""" | |
from typing import Annotated | |
from pathlib import Path | |
import typer | |
from rich import print as rprint | |
from rich.panel import Panel | |
from rich.table import Table | |
from ultimate_rvc.core.generate.song_cover import run_pipeline as _run_pipeline | |
from ultimate_rvc.core.generate.song_cover import to_wav as _to_wav | |
from ultimate_rvc.typing_extra import AudioExt, F0Method | |
app = typer.Typer( | |
name="song-cover", | |
no_args_is_help=True, | |
help="Generate song covers", | |
rich_markup_mode="markdown", | |
) | |
def complete_name(incomplete: str, enumeration: list[str]) -> list[str]: | |
""" | |
Return a list of names that start with the incomplete string. | |
Parameters | |
---------- | |
incomplete : str | |
The incomplete string to complete. | |
enumeration : list[str] | |
The list of names to complete from. | |
Returns | |
------- | |
list[str] | |
The list of names that start with the incomplete string. | |
""" | |
return [name for name in list(enumeration) if name.startswith(incomplete)] | |
def complete_audio_ext(incomplete: str) -> list[str]: | |
""" | |
Return a list of audio extensions that start with the incomplete | |
string. | |
Parameters | |
---------- | |
incomplete : str | |
The incomplete string to complete. | |
Returns | |
------- | |
list[str] | |
The list of audio extensions that start with the incomplete | |
string. | |
""" | |
return complete_name(incomplete, list(AudioExt)) | |
def complete_f0_method(incomplete: str) -> list[str]: | |
""" | |
Return a list of F0 methods that start with the incomplete string. | |
Parameters | |
---------- | |
incomplete : str | |
The incomplete string to complete. | |
Returns | |
------- | |
list[str] | |
The list of F0 methods that start with the incomplete string. | |
""" | |
return complete_name(incomplete, list(F0Method)) | |
def to_wav( | |
audio_track: Annotated[ | |
Path, | |
typer.Argument( | |
help="The path to the audio track to convert.", | |
exists=True, | |
file_okay=True, | |
dir_okay=False, | |
resolve_path=True, | |
), | |
], | |
song_dir: Annotated[ | |
Path, | |
typer.Argument( | |
help=( | |
"The path to the song directory where the converted audio track will be" | |
" saved." | |
), | |
exists=True, | |
file_okay=False, | |
dir_okay=True, | |
resolve_path=True, | |
), | |
], | |
prefix: Annotated[ | |
str, | |
typer.Argument( | |
help="The prefix to use for the name of the converted audio track.", | |
), | |
], | |
accepted_format: Annotated[ | |
list[AudioExt] | None, | |
typer.Option( | |
case_sensitive=False, | |
autocompletion=complete_audio_ext, | |
help=( | |
"An audio format to accept for conversion. This option can be used" | |
" multiple times to accept multiple formats. If not provided, the" | |
" default accepted formats are mp3, ogg, flac, m4a and aac." | |
), | |
), | |
] = None, | |
) -> None: | |
""" | |
Convert a given audio track to wav format if its current format | |
is an accepted format. See the --accepted-formats option for more | |
information on accepted formats. | |
""" | |
rprint() | |
wav_path = _to_wav( | |
audio_track=audio_track, | |
song_dir=song_dir, | |
prefix=prefix, | |
accepted_formats=set(accepted_format) if accepted_format else None, | |
) | |
if wav_path == audio_track: | |
rprint( | |
"[+] Audio track was not converted to WAV format. Presumably, " | |
"its format is not in the given list of accepted formats.", | |
) | |
else: | |
rprint("[+] Audio track succesfully converted to WAV format!") | |
rprint(Panel(f"[green]{wav_path}", title="WAV Audio Track Path")) | |
def run_pipeline( | |
source: Annotated[ | |
str, | |
typer.Argument( | |
help=( | |
"A Youtube URL, the path to a local audio file or the path to a" | |
" song directory." | |
), | |
), | |
], | |
model_name: Annotated[ | |
str, | |
typer.Argument(help="The name of the voice model to use for vocal conversion."), | |
], | |
n_octaves: Annotated[ | |
int, | |
typer.Option( | |
rich_help_panel="Vocal Conversion Options", | |
help=( | |
"The number of octaves to pitch-shift the converted vocals by.Use 1 for" | |
" male-to-female and -1 for vice-versa." | |
), | |
), | |
] = 0, | |
n_semitones: Annotated[ | |
int, | |
typer.Option( | |
rich_help_panel="Vocal Conversion Options", | |
help=( | |
"The number of semi-tones to pitch-shift the converted vocals," | |
" instrumentals, and backup vocals by. Altering this slightly reduces" | |
" sound quality" | |
), | |
), | |
] = 0, | |
f0_method: Annotated[ | |
F0Method, | |
typer.Option( | |
case_sensitive=False, | |
autocompletion=complete_f0_method, | |
rich_help_panel="Vocal Conversion Options", | |
help=( | |
"The method to use for pitch detection during vocal conversion. Best" | |
" option is RMVPE (clarity in vocals), then Mangio-Crepe (smoother" | |
" vocals)." | |
), | |
), | |
] = F0Method.RMVPE, | |
index_rate: Annotated[ | |
float, | |
typer.Option( | |
min=0, | |
max=1, | |
rich_help_panel="Vocal Conversion Options", | |
help=( | |
"A decimal number e.g. 0.5, Controls how much of the accent in the" | |
" voice model to keep in the converted vocals. Increase to bias the" | |
" conversion towards the accent of the voice model." | |
), | |
), | |
] = 0.5, | |
filter_radius: Annotated[ | |
int, | |
typer.Option( | |
min=0, | |
max=7, | |
rich_help_panel="Vocal Conversion Options", | |
help=( | |
"A number between 0 and 7. If >=3: apply median filtering to the pitch" | |
" results harvested during vocal conversion. Can help reduce" | |
" breathiness in the converted vocals." | |
), | |
), | |
] = 3, | |
rms_mix_rate: Annotated[ | |
float, | |
typer.Option( | |
min=0, | |
max=1, | |
rich_help_panel="Vocal Conversion Options", | |
help=( | |
"A decimal number e.g. 0.25. Controls how much to mimic the loudness of" | |
" the input vocals (0) or a fixed loudness (1) during vocal conversion." | |
), | |
), | |
] = 0.25, | |
protect: Annotated[ | |
float, | |
typer.Option( | |
min=0, | |
max=0.5, | |
rich_help_panel="Vocal Conversion Options", | |
help=( | |
"A decimal number e.g. 0.33. Controls protection of voiceless" | |
" consonants and breath sounds during vocal conversion. Decrease to" | |
" increase protection at the cost of indexing accuracy. Set to 0.5 to" | |
" disable." | |
), | |
), | |
] = 0.33, | |
hop_length: Annotated[ | |
int, | |
typer.Option( | |
rich_help_panel="Vocal Conversion Options", | |
help=( | |
"Controls how often the CREPE-based pitch detection algorithm checks" | |
" for pitch changes during vocal conversion. Measured in milliseconds." | |
" Lower values lead to longer conversion times and a higher risk of" | |
" voice cracks, but better pitch accuracy. Recommended value: 128." | |
), | |
), | |
] = 128, | |
room_size: Annotated[ | |
float, | |
typer.Option( | |
min=0, | |
max=1, | |
rich_help_panel="Vocal Post-processing Options", | |
help=( | |
"The room size of the reverb effect applied to the converted vocals." | |
" Increase for longer reverb time. Should be a value between 0 and 1." | |
), | |
), | |
] = 0.15, | |
wet_level: Annotated[ | |
float, | |
typer.Option( | |
min=0, | |
max=1, | |
rich_help_panel="Vocal Post-processing Options", | |
help=( | |
"The loudness of the converted vocals with reverb effect applied." | |
" Should be a value between 0 and 1" | |
), | |
), | |
] = 0.2, | |
dry_level: Annotated[ | |
float, | |
typer.Option( | |
min=0, | |
max=1, | |
rich_help_panel="Vocal Post-processing Options", | |
help=( | |
"The loudness of the converted vocals wihout reverb effect applied." | |
" Should be a value between 0 and 1." | |
), | |
), | |
] = 0.8, | |
damping: Annotated[ | |
float, | |
typer.Option( | |
min=0, | |
max=1, | |
rich_help_panel="Vocal Post-processing Options", | |
help=( | |
"The absorption of high frequencies in the reverb effect applied to the" | |
" converted vocals. Should be a value between 0 and 1." | |
), | |
), | |
] = 0.7, | |
main_gain: Annotated[ | |
int, | |
typer.Option( | |
rich_help_panel="Audio Mixing Options", | |
help="The gain to apply to the post-processed vocals. Measured in dB.", | |
), | |
] = 0, | |
inst_gain: Annotated[ | |
int, | |
typer.Option( | |
rich_help_panel="Audio Mixing Options", | |
help=( | |
"The gain to apply to the pitch-shifted instrumentals. Measured in dB." | |
), | |
), | |
] = 0, | |
backup_gain: Annotated[ | |
int, | |
typer.Option( | |
rich_help_panel="Audio Mixing Options", | |
help=( | |
"The gain to apply to the pitch-shifted backup vocals. Measured in dB." | |
), | |
), | |
] = 0, | |
output_sr: Annotated[ | |
int, | |
typer.Option( | |
rich_help_panel="Audio Mixing Options", | |
help="The sample rate of the song cover.", | |
), | |
] = 44100, | |
output_format: Annotated[ | |
AudioExt, | |
typer.Option( | |
case_sensitive=False, | |
autocompletion=complete_audio_ext, | |
rich_help_panel="Audio Mixing Options", | |
help="The audio format of the song cover.", | |
), | |
] = AudioExt.MP3, | |
output_name: Annotated[ | |
str | None, | |
typer.Option( | |
rich_help_panel="Audio Mixing Options", | |
help="The name of the song cover.", | |
), | |
] = None, | |
) -> None: | |
"""Run the song cover generation pipeline.""" | |
[song_cover_path, *intermediate_audio_file_paths] = _run_pipeline( | |
source=source, | |
model_name=model_name, | |
n_octaves=n_octaves, | |
n_semitones=n_semitones, | |
f0_method=f0_method, | |
index_rate=index_rate, | |
filter_radius=filter_radius, | |
rms_mix_rate=rms_mix_rate, | |
protect=protect, | |
hop_length=hop_length, | |
room_size=room_size, | |
wet_level=wet_level, | |
dry_level=dry_level, | |
damping=damping, | |
main_gain=main_gain, | |
inst_gain=inst_gain, | |
backup_gain=backup_gain, | |
output_sr=output_sr, | |
output_format=output_format, | |
output_name=output_name, | |
progress_bar=None, | |
) | |
table = Table() | |
table.add_column("Type") | |
table.add_column("Path") | |
for name, path in zip( | |
[ | |
"Song", | |
"Vocals", | |
"Instrumentals", | |
"Main vocals", | |
"Backup vocals", | |
"De-reverbed main vocals", | |
"Main vocals reverb", | |
"Converted vocals", | |
"Post-processed vocals", | |
"Pitch-shifted instrumentals", | |
"Pitch-shifted backup vocals", | |
], | |
intermediate_audio_file_paths, | |
strict=True, | |
): | |
table.add_row(name, f"[green]{path}") | |
rprint("[+] Song cover succesfully generated!") | |
rprint(Panel(f"[green]{song_cover_path}", title="Song Cover Path")) | |
rprint(Panel(table, title="Intermediate Audio Files")) | |