""" Module which defines the command-line interface for generating a song cover. """ from typing import Annotated from pathlib import Path import typer from rich import print as rprint from rich.panel import Panel from rich.table import Table from ultimate_rvc.core.generate.song_cover import run_pipeline as _run_pipeline from ultimate_rvc.core.generate.song_cover import to_wav as _to_wav from ultimate_rvc.typing_extra import AudioExt, F0Method app = typer.Typer( name="song-cover", no_args_is_help=True, help="Generate song covers", rich_markup_mode="markdown", ) def complete_name(incomplete: str, enumeration: list[str]) -> list[str]: """ Return a list of names that start with the incomplete string. Parameters ---------- incomplete : str The incomplete string to complete. enumeration : list[str] The list of names to complete from. Returns ------- list[str] The list of names that start with the incomplete string. """ return [name for name in list(enumeration) if name.startswith(incomplete)] def complete_audio_ext(incomplete: str) -> list[str]: """ Return a list of audio extensions that start with the incomplete string. Parameters ---------- incomplete : str The incomplete string to complete. Returns ------- list[str] The list of audio extensions that start with the incomplete string. """ return complete_name(incomplete, list(AudioExt)) def complete_f0_method(incomplete: str) -> list[str]: """ Return a list of F0 methods that start with the incomplete string. Parameters ---------- incomplete : str The incomplete string to complete. Returns ------- list[str] The list of F0 methods that start with the incomplete string. """ return complete_name(incomplete, list(F0Method)) @app.command(no_args_is_help=True) def to_wav( audio_track: Annotated[ Path, typer.Argument( help="The path to the audio track to convert.", exists=True, file_okay=True, dir_okay=False, resolve_path=True, ), ], song_dir: Annotated[ Path, typer.Argument( help=( "The path to the song directory where the converted audio track will be" " saved." ), exists=True, file_okay=False, dir_okay=True, resolve_path=True, ), ], prefix: Annotated[ str, typer.Argument( help="The prefix to use for the name of the converted audio track.", ), ], accepted_format: Annotated[ list[AudioExt] | None, typer.Option( case_sensitive=False, autocompletion=complete_audio_ext, help=( "An audio format to accept for conversion. This option can be used" " multiple times to accept multiple formats. If not provided, the" " default accepted formats are mp3, ogg, flac, m4a and aac." ), ), ] = None, ) -> None: """ Convert a given audio track to wav format if its current format is an accepted format. See the --accepted-formats option for more information on accepted formats. """ rprint() wav_path = _to_wav( audio_track=audio_track, song_dir=song_dir, prefix=prefix, accepted_formats=set(accepted_format) if accepted_format else None, ) if wav_path == audio_track: rprint( "[+] Audio track was not converted to WAV format. Presumably, " "its format is not in the given list of accepted formats.", ) else: rprint("[+] Audio track succesfully converted to WAV format!") rprint(Panel(f"[green]{wav_path}", title="WAV Audio Track Path")) @app.command(no_args_is_help=True) def run_pipeline( source: Annotated[ str, typer.Argument( help=( "A Youtube URL, the path to a local audio file or the path to a" " song directory." ), ), ], model_name: Annotated[ str, typer.Argument(help="The name of the voice model to use for vocal conversion."), ], n_octaves: Annotated[ int, typer.Option( rich_help_panel="Vocal Conversion Options", help=( "The number of octaves to pitch-shift the converted vocals by.Use 1 for" " male-to-female and -1 for vice-versa." ), ), ] = 0, n_semitones: Annotated[ int, typer.Option( rich_help_panel="Vocal Conversion Options", help=( "The number of semi-tones to pitch-shift the converted vocals," " instrumentals, and backup vocals by. Altering this slightly reduces" " sound quality" ), ), ] = 0, f0_method: Annotated[ F0Method, typer.Option( case_sensitive=False, autocompletion=complete_f0_method, rich_help_panel="Vocal Conversion Options", help=( "The method to use for pitch detection during vocal conversion. Best" " option is RMVPE (clarity in vocals), then Mangio-Crepe (smoother" " vocals)." ), ), ] = F0Method.RMVPE, index_rate: Annotated[ float, typer.Option( min=0, max=1, rich_help_panel="Vocal Conversion Options", help=( "A decimal number e.g. 0.5, Controls how much of the accent in the" " voice model to keep in the converted vocals. Increase to bias the" " conversion towards the accent of the voice model." ), ), ] = 0.5, filter_radius: Annotated[ int, typer.Option( min=0, max=7, rich_help_panel="Vocal Conversion Options", help=( "A number between 0 and 7. If >=3: apply median filtering to the pitch" " results harvested during vocal conversion. Can help reduce" " breathiness in the converted vocals." ), ), ] = 3, rms_mix_rate: Annotated[ float, typer.Option( min=0, max=1, rich_help_panel="Vocal Conversion Options", help=( "A decimal number e.g. 0.25. Controls how much to mimic the loudness of" " the input vocals (0) or a fixed loudness (1) during vocal conversion." ), ), ] = 0.25, protect: Annotated[ float, typer.Option( min=0, max=0.5, rich_help_panel="Vocal Conversion Options", help=( "A decimal number e.g. 0.33. Controls protection of voiceless" " consonants and breath sounds during vocal conversion. Decrease to" " increase protection at the cost of indexing accuracy. Set to 0.5 to" " disable." ), ), ] = 0.33, hop_length: Annotated[ int, typer.Option( rich_help_panel="Vocal Conversion Options", help=( "Controls how often the CREPE-based pitch detection algorithm checks" " for pitch changes during vocal conversion. Measured in milliseconds." " Lower values lead to longer conversion times and a higher risk of" " voice cracks, but better pitch accuracy. Recommended value: 128." ), ), ] = 128, room_size: Annotated[ float, typer.Option( min=0, max=1, rich_help_panel="Vocal Post-processing Options", help=( "The room size of the reverb effect applied to the converted vocals." " Increase for longer reverb time. Should be a value between 0 and 1." ), ), ] = 0.15, wet_level: Annotated[ float, typer.Option( min=0, max=1, rich_help_panel="Vocal Post-processing Options", help=( "The loudness of the converted vocals with reverb effect applied." " Should be a value between 0 and 1" ), ), ] = 0.2, dry_level: Annotated[ float, typer.Option( min=0, max=1, rich_help_panel="Vocal Post-processing Options", help=( "The loudness of the converted vocals wihout reverb effect applied." " Should be a value between 0 and 1." ), ), ] = 0.8, damping: Annotated[ float, typer.Option( min=0, max=1, rich_help_panel="Vocal Post-processing Options", help=( "The absorption of high frequencies in the reverb effect applied to the" " converted vocals. Should be a value between 0 and 1." ), ), ] = 0.7, main_gain: Annotated[ int, typer.Option( rich_help_panel="Audio Mixing Options", help="The gain to apply to the post-processed vocals. Measured in dB.", ), ] = 0, inst_gain: Annotated[ int, typer.Option( rich_help_panel="Audio Mixing Options", help=( "The gain to apply to the pitch-shifted instrumentals. Measured in dB." ), ), ] = 0, backup_gain: Annotated[ int, typer.Option( rich_help_panel="Audio Mixing Options", help=( "The gain to apply to the pitch-shifted backup vocals. Measured in dB." ), ), ] = 0, output_sr: Annotated[ int, typer.Option( rich_help_panel="Audio Mixing Options", help="The sample rate of the song cover.", ), ] = 44100, output_format: Annotated[ AudioExt, typer.Option( case_sensitive=False, autocompletion=complete_audio_ext, rich_help_panel="Audio Mixing Options", help="The audio format of the song cover.", ), ] = AudioExt.MP3, output_name: Annotated[ str | None, typer.Option( rich_help_panel="Audio Mixing Options", help="The name of the song cover.", ), ] = None, ) -> None: """Run the song cover generation pipeline.""" [song_cover_path, *intermediate_audio_file_paths] = _run_pipeline( source=source, model_name=model_name, n_octaves=n_octaves, n_semitones=n_semitones, f0_method=f0_method, index_rate=index_rate, filter_radius=filter_radius, rms_mix_rate=rms_mix_rate, protect=protect, hop_length=hop_length, room_size=room_size, wet_level=wet_level, dry_level=dry_level, damping=damping, main_gain=main_gain, inst_gain=inst_gain, backup_gain=backup_gain, output_sr=output_sr, output_format=output_format, output_name=output_name, progress_bar=None, ) table = Table() table.add_column("Type") table.add_column("Path") for name, path in zip( [ "Song", "Vocals", "Instrumentals", "Main vocals", "Backup vocals", "De-reverbed main vocals", "Main vocals reverb", "Converted vocals", "Post-processed vocals", "Pitch-shifted instrumentals", "Pitch-shifted backup vocals", ], intermediate_audio_file_paths, strict=True, ): table.add_row(name, f"[green]{path}") rprint("[+] Song cover succesfully generated!") rprint(Panel(f"[green]{song_cover_path}", title="Song Cover Path")) rprint(Panel(table, title="Intermediate Audio Files"))