Spaces:
Runtime error
Runtime error
import os | |
import binascii | |
import warnings | |
import json | |
import argparse | |
import copy | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import torch | |
import tqdm | |
import librosa | |
import librosa.display | |
import soundfile as sf | |
import gradio as gr | |
import pytube as pt | |
from pytube.exceptions import VideoUnavailable | |
from inference.style_transfer import * | |
from inference.mastering_transfer import * | |
yt_video_dir = "./yt_dir/0" | |
os.makedirs(yt_video_dir, exist_ok=True) | |
def get_audio_from_yt_video_input(yt_link: str, start_point_in_second=0, duration_in_second=30): | |
try: | |
yt = pt.YouTube(yt_link) | |
t = yt.streams.filter(only_audio=True) | |
filename_in = os.path.join(yt_video_dir, "input.wav") | |
t[0].download(filename=filename_in) | |
except VideoUnavailable as e: | |
warnings.warn(f"Video Not Found at {yt_link} ({e})") | |
filename_in = None | |
# trim audio length - due to computation time on HuggingFace environment | |
trim_audio(target_file_path=filename_in, start_point_in_second=start_point_in_second, duration_in_second=duration_in_second) | |
return filename_in, filename_in | |
def get_audio_from_yt_video_ref(yt_link: str, start_point_in_second=0, duration_in_second=30): | |
try: | |
yt = pt.YouTube(yt_link) | |
t = yt.streams.filter(only_audio=True) | |
filename_ref = os.path.join(yt_video_dir, "reference.wav") | |
t[0].download(filename=filename_ref) | |
except VideoUnavailable as e: | |
warnings.warn(f"Video Not Found at {yt_link} ({e})") | |
filename_ref = None | |
# trim audio length - due to computation time on HuggingFace environment | |
trim_audio(target_file_path=filename_ref, start_point_in_second=start_point_in_second, duration_in_second=duration_in_second) | |
return filename_ref, filename_ref | |
def inference(file_uploaded_in, file_uploaded_ref): | |
# Perform music mixing style transfer | |
args = set_up() | |
inference_style_transfer = Mixing_Style_Transfer_Inference(args) | |
output_wav_path = inference_style_transfer.inference(None, None) | |
return output_wav_path, output_wav_path | |
def inference_mastering(file_uploaded_in, file_uploaded_ref): | |
# Perform music mastering style transfer | |
args = set_up() | |
inference_mastering_style_transfer = Mastering_Style_Transfer_Inference(args) | |
output_wav_path_mastering = inference_mastering_style_transfer.inference(file_uploaded_in, file_uploaded_ref) | |
return output_wav_path_mastering, output_wav_path_mastering | |
with gr.Blocks() as demo: | |
gr.HTML( | |
""" | |
<div style="text-align: center; max-width: 700px; margin: 0 auto;"> | |
<div | |
style=" | |
display: inline-flex; | |
align-items: center; | |
gap: 0.8rem; | |
font-size: 1.75rem; | |
" | |
> | |
<h1 style="font-weight: 900; margin-bottom: 7px;"> | |
Music Mixing Style Transfer | |
</h1> | |
</div> | |
<p style="margin-bottom: 10px; font-size: 94%"> | |
Hugging Face interactive demo of the paper "Music Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects" (ICASSP 2023).<br> | |
a | |
</div> | |
""" | |
) | |
with gr.Group(): | |
with gr.Column(): | |
with gr.Blocks(): | |
with gr.Tab("Input Music"): | |
file_uploaded_in = gr.Audio(label="Input track (mix) to be mixing style transferred", type="filepath") | |
with gr.Tab("YouTube url"): | |
with gr.Row(): | |
yt_link_in = gr.Textbox( | |
label="Enter YouTube Link of the Video", autofocus=True, lines=3 | |
) | |
yt_btn_in = gr.Button("Download Audio from YouTube Link", size="lg") | |
yt_audio_path_in = gr.Audio( | |
label="Input Audio Extracted from the YouTube Video", interactive=False | |
) | |
yt_btn_in.click( | |
get_audio_from_yt_video_input, | |
inputs=[yt_link_in], | |
outputs=[yt_audio_path_in, file_uploaded_in], | |
) | |
with gr.Blocks(): | |
with gr.Tab("Reference Music"): | |
file_uploaded_ref = gr.Audio(label="Reference track (mix) to copy mixing style", type="filepath") | |
with gr.Tab("YouTube url"): | |
with gr.Row(): | |
yt_link_ref = gr.Textbox( | |
label="Enter YouTube Link of the Video", autofocus=True, lines=3 | |
) | |
yt_btn_ref = gr.Button("Download Audio from YouTube Link", size="lg") | |
yt_audio_path_ref = gr.Audio( | |
label="Reference Audio Extracted from the YouTube Video", interactive=False | |
) | |
yt_btn_ref.click( | |
get_audio_from_yt_video_ref, | |
inputs=[yt_link_ref], | |
outputs=[yt_audio_path_ref, file_uploaded_ref], | |
) | |
with gr.Group(): | |
gr.HTML( | |
""" | |
<div> <h3> <center> Mixing Style Transfer. </h3> </div> | |
""" | |
) | |
with gr.Column(): | |
inference_btn = gr.Button("Perform Style Transfer") | |
with gr.Row(): | |
output_mix = gr.Audio(label="mixing style transferred music track") | |
inference_btn.click( | |
inference, | |
inputs=[file_uploaded_in, file_uploaded_ref], | |
outputs=[output_mix], | |
) | |
with gr.Group(): | |
gr.HTML( | |
""" | |
<div> <h3> <center> Mastering Style Transfer. </h3> </div> | |
""" | |
) | |
with gr.Column(): | |
inference_mastering_btn = gr.Button("Perform Mastering Style Transfer") | |
with gr.Row(): | |
output_master = gr.Audio(label="mastering style transferred music track") | |
inference_mastering_btn.click( | |
inference_mastering, | |
inputs=[file_uploaded_in, file_uploaded_ref], | |
outputs=[output_master], | |
) | |
if __name__ == "__main__": | |
demo.launch(debug=True) | |