File size: 4,279 Bytes
6b95f60 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import os
import json
import argparse
import copy
import numpy as np
import matplotlib.pyplot as plt
import torch
import tqdm
import librosa
import librosa.display
import soundfile as sf
import pyloudnorm as pyln
from dotmap import DotMap
import gradio as gr
import pytube as pt
from pytube.exceptions import VideoUnavailable
from utils import mp3_write, normalize
from models import load_model_with_args
from separate_func import (
conv_tasnet_separate,
)
from utils import db2linear
def get_audio_from_yt_video(yt_link: str):
try:
yt = pt.YouTube(yt_link)
t = yt.streams.filter(only_audio=True)
filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".wav")
t[0].download(filename=filename)
except VideoUnavailable as e:
warnings.warn(f"Video Not Found at {yt_link} ({e})")
filename = None
return filename, filename
def inference(file_uploaded_in, file_uploaded_ref):
output_wav = None
return output_wav
with gr.Blocks() as demo:
gr.HTML(
"""
<div style="text-align: center; max-width: 700px; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px;">
Music Mixing Style Transfer
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Hugging Face interactive demo of the paper "Music Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects" (ICASSP 2023).<br>
a
</div>
"""
)
with gr.Group():
with gr.Column():
with gr.Blocks():
with gr.Tab("Input Music"):
file_uploaded_in = gr.Audio(label="Input track (mix) to be mixing style transferred", type="filepath")
with gr.Tab("YouTube url"):
with gr.Row():
yt_link_in = gr.Textbox(
label="Enter YouTube Link of the Video", autofocus=True, lines=3
)
yt_btn = gr.Button("Download Audio from YouTube Link", size="lg")
yt_audio_path = gr.Audio(
label="Input Audio Extracted from the YouTube Video", interactive=False
)
yt_btn.click(
get_audio_from_yt_video,
inputs=[yt_link_in],
outputs=[yt_audio_path, file_uploaded_in],
)
with gr.Blocks():
with gr.Tab("Reference Music"):
file_uploaded_ref = gr.Audio(label="Reference track (mix) to copy mixing style", type="filepath")
with gr.Tab("YouTube url"):
with gr.Row():
yt_link_ref = gr.Textbox(
label="Enter YouTube Link of the Video", autofocus=True, lines=3
)
yt_btn = gr.Button("Download Audio from YouTube Link", size="lg")
yt_audio_path = gr.Audio(
label="Reference Audio Extracted from the YouTube Video", interactive=False
)
yt_btn.click(
get_audio_from_yt_video,
inputs=[yt_link_ref],
outputs=[yt_audio_path, file_uploaded_ref],
)
with gr.Group():
gr.HTML(
"""
<div> <h3> <center> Mixing Style Transferred Output. </h3> </div>
"""
)
# with gr.Row().style(mobile_collapse=False, equal_height=True):
with gr.Row():
output_mix = gr.File(label="Download style transferred music track")
generate_btn.click(
inference,
inputs=[file_uploaded_in, file_uploaded_ref],
outputs=[wav_output],
)
if __name__ == "__main__":
demo.launch(debug=True)
|