Spaces:
Running
Running
File size: 7,056 Bytes
45ea80f 2e66664 45ea80f 2e66664 45ea80f 2e66664 45ea80f 459a21c 45ea80f 6d70884 d48a45a 45ea80f 9697a6f 45ea80f 36d6869 45ea80f 36d6869 45ea80f 9697a6f 45ea80f 9697a6f 45ea80f 9697a6f 45ea80f 2e66664 45ea80f b9bf35a 158585c 043c2d7 2e66664 45ea80f 8c9ee04 45ea80f cac2c49 45ea80f e3c9443 45ea80f e3c9443 45ea80f 043c2d7 e182234 6d6c0d5 45ea80f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import os
import binascii
import warnings
import json
import argparse
import copy
import numpy as np
import matplotlib.pyplot as plt
import torch
import tqdm
import librosa
import soundfile as sf
import gradio as gr
import pytube as pt
from pytube.exceptions import VideoUnavailable
from inference.style_transfer import *
yt_video_dir = f"./yt_dir/0"
os.makedirs(yt_video_dir, exist_ok=True)
def get_audio_from_yt_video_input(yt_link: str, start_point_in_second=0, duration_in_second=30):
try:
yt = pt.YouTube(yt_link)
t = yt.streams.filter(only_audio=True)
filename_in = os.path.join(yt_video_dir, "input.wav")
t[0].download(filename=filename_in)
except VideoUnavailable as e:
warnings.warn(f"Video Not Found at {yt_link} ({e})")
filename_in = None
# trim audio length - due to computation time on HuggingFace environment
trim_audio(target_file_path=filename_in, start_point_in_second=start_point_in_second, duration_in_second=duration_in_second)
return filename_in, filename_in
def get_audio_from_yt_video_ref(yt_link: str, start_point_in_second=0, duration_in_second=30):
try:
yt = pt.YouTube(yt_link)
t = yt.streams.filter(only_audio=True)
filename_ref = os.path.join(yt_video_dir, "reference.wav")
t[0].download(filename=filename_ref)
except VideoUnavailable as e:
warnings.warn(f"Video Not Found at {yt_link} ({e})")
filename_ref = None
# trim audio length - due to computation time on HuggingFace environment
trim_audio(target_file_path=filename_ref, start_point_in_second=start_point_in_second, duration_in_second=duration_in_second)
return filename_ref, filename_ref
def inference(file_uploaded_in, file_uploaded_ref):
# clear out previously separated results
os.system(f"rm -r {yt_video_dir}/separated")
# change file path name
os.system(f"cp {file_uploaded_in} {yt_video_dir}/input.wav")
os.system(f"cp {file_uploaded_ref} {yt_video_dir}/reference.wav")
# Perform music mixing style transfer
args = set_up()
inference_style_transfer = Mixing_Style_Transfer_Inference(args)
output_wav_path, fin_data_out_mix = inference_style_transfer.inference(file_uploaded_in, file_uploaded_ref)
return (44100, fin_data_out_mix)
with gr.Blocks() as demo:
gr.HTML(
"""
<div style="text-align: center; max-width: 700px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px;">
Music Mixing Style Transfer
</h1>
</div>
"""
)
gr.Markdown(
"""
This page is a Hugging Face interactive demo of the paper ["Music Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects"](https://huggingface.co/papers/2211.02247) (ICASSP 2023).
- [project page](https://jhtonykoo.github.io/MixingStyleTransfer/)
- [GitHub](https://github.com/jhtonyKoo/music_mixing_style_transfer)
- [supplementary](https://pale-cicada-946.notion.site/Music-Mixing-Style-Transfer-A-Contrastive-Learning-Approach-to-Disentangle-Audio-Effects-Supplemen-e6eccd9a431a4a8fa4fdd5adb2d3f219)
"""
)
with gr.Group():
with gr.Column():
with gr.Blocks():
with gr.Tab("Input Music"):
file_uploaded_in = gr.Audio(label="Input track (mix) to be mixing style transferred", type='filepath')
with gr.Tab("YouTube url"):
with gr.Row():
yt_link_in = gr.Textbox(
label="Enter YouTube Link of the Video", autofocus=True, lines=3
)
yt_in_start_sec = gr.Number(
value=0,
label="starting point of the song (in seconds)"
)
yt_in_duration_sec = gr.Number(
value=30,
label="duration of the song (in seconds)"
)
yt_btn_in = gr.Button("Download Audio from YouTube Link", size="lg")
yt_audio_path_in = gr.Audio(
label="Input Audio Extracted from the YouTube Video", interactive=False
)
yt_btn_in.click(
get_audio_from_yt_video_input,
inputs=[yt_link_in, yt_in_start_sec, yt_in_duration_sec],
outputs=[yt_audio_path_in, file_uploaded_in],
)
with gr.Blocks():
with gr.Tab("Reference Music"):
file_uploaded_ref = gr.Audio(label="Reference track (mix) to copy mixing style", type='filepath')
with gr.Tab("YouTube url"):
with gr.Row():
yt_link_ref = gr.Textbox(
label="Enter YouTube Link of the Video", autofocus=True, lines=3
)
yt_ref_start_sec = gr.Number(
value=0,
label="starting point of the song (in seconds)"
)
yt_ref_duration_sec = gr.Number(
value=30,
label="duration of the song (in seconds)"
)
yt_btn_ref = gr.Button("Download Audio from YouTube Link", size="lg")
yt_audio_path_ref = gr.Audio(
label="Reference Audio Extracted from the YouTube Video", interactive=False
)
yt_btn_ref.click(
get_audio_from_yt_video_ref,
inputs=[yt_link_ref, yt_ref_start_sec, yt_ref_duration_sec],
outputs=[yt_audio_path_ref, file_uploaded_ref],
)
with gr.Group():
gr.HTML(
"""
<div> <h3> <center> Mixing Style Transfer. Perform stem-wise audio-effects style conversion by first source separating the input mix. The inference computation time takes longer as the input samples' duration. so plz be patient... </h3> </div>
"""
)
with gr.Column():
inference_btn = gr.Button("Run Mixing Style Transfer")
with gr.Row():
output_mix = gr.Audio(label="mixing style transferred music track", type='numpy')
inference_btn.click(
inference,
inputs=[file_uploaded_in, file_uploaded_ref],
outputs=[output_mix],
)
if __name__ == "__main__":
demo.launch(debug=True) |