File size: 7,056 Bytes
45ea80f
 
 
 
 
 
 
 
 
 
2e66664
45ea80f
 
2e66664
45ea80f
 
2e66664
45ea80f
459a21c
45ea80f
6d70884
d48a45a
45ea80f
 
 
9697a6f
45ea80f
 
 
 
 
 
 
 
 
 
 
 
36d6869
45ea80f
36d6869
45ea80f
 
 
 
 
 
 
 
 
 
 
 
9697a6f
45ea80f
 
 
 
 
 
 
 
9697a6f
45ea80f
 
9697a6f
45ea80f
 
2e66664
45ea80f
b9bf35a
158585c
043c2d7
2e66664
45ea80f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c9ee04
45ea80f
 
 
 
 
 
 
cac2c49
45ea80f
e3c9443
45ea80f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3c9443
45ea80f
 
 
 
 
 
 
043c2d7
e182234
6d6c0d5
45ea80f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import os
import binascii
import warnings

import json
import argparse
import copy

import numpy as np
import matplotlib.pyplot as plt
import torch
import tqdm
import librosa
import soundfile as sf
import gradio as gr
import pytube as pt

from pytube.exceptions import VideoUnavailable

from inference.style_transfer import *


yt_video_dir = f"./yt_dir/0"
os.makedirs(yt_video_dir, exist_ok=True)


def get_audio_from_yt_video_input(yt_link: str, start_point_in_second=0, duration_in_second=30):
    try:
        yt = pt.YouTube(yt_link)
        t = yt.streams.filter(only_audio=True)
        filename_in = os.path.join(yt_video_dir, "input.wav")
        t[0].download(filename=filename_in)
    except VideoUnavailable as e:
        warnings.warn(f"Video Not Found at {yt_link} ({e})")
        filename_in = None

    # trim audio length - due to computation time on HuggingFace environment
    trim_audio(target_file_path=filename_in, start_point_in_second=start_point_in_second, duration_in_second=duration_in_second)
    
    return filename_in, filename_in

def get_audio_from_yt_video_ref(yt_link: str, start_point_in_second=0, duration_in_second=30):
    try:
        yt = pt.YouTube(yt_link)
        t = yt.streams.filter(only_audio=True)
        filename_ref = os.path.join(yt_video_dir, "reference.wav")
        t[0].download(filename=filename_ref)
    except VideoUnavailable as e:
        warnings.warn(f"Video Not Found at {yt_link} ({e})")
        filename_ref = None

    # trim audio length - due to computation time on HuggingFace environment
    trim_audio(target_file_path=filename_ref, start_point_in_second=start_point_in_second, duration_in_second=duration_in_second)
    
    return filename_ref, filename_ref

def inference(file_uploaded_in, file_uploaded_ref):
    # clear out previously separated results
    os.system(f"rm -r {yt_video_dir}/separated")
    # change file path name
    os.system(f"cp {file_uploaded_in} {yt_video_dir}/input.wav")
    os.system(f"cp {file_uploaded_ref} {yt_video_dir}/reference.wav")
    
    # Perform music mixing style transfer
    args = set_up()
    
    inference_style_transfer = Mixing_Style_Transfer_Inference(args)
    output_wav_path, fin_data_out_mix = inference_style_transfer.inference(file_uploaded_in, file_uploaded_ref)
    
    return (44100, fin_data_out_mix)



with gr.Blocks() as demo:
    gr.HTML(
        """
            <div style="text-align: center; max-width: 700px; margin: 0 auto;">
              <div
                style="
                  display: inline-flex;
                  align-items: center;
                  gap: 0.8rem;
                  font-size: 1.75rem;
                "
              >
                <h1 style="font-weight: 900; margin-bottom: 7px;">
                  Music Mixing Style Transfer
                </h1>
              </div>
        """
    )
    gr.Markdown(
        """
        This page is a Hugging Face interactive demo of the paper ["Music Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects"](https://huggingface.co/papers/2211.02247) (ICASSP 2023).
            - [project page](https://jhtonykoo.github.io/MixingStyleTransfer/)
            - [GitHub](https://github.com/jhtonyKoo/music_mixing_style_transfer)
            - [supplementary](https://pale-cicada-946.notion.site/Music-Mixing-Style-Transfer-A-Contrastive-Learning-Approach-to-Disentangle-Audio-Effects-Supplemen-e6eccd9a431a4a8fa4fdd5adb2d3f219)
        """
    )
    with gr.Group():
        with gr.Column():
            with gr.Blocks():
                with gr.Tab("Input Music"):
                    file_uploaded_in = gr.Audio(label="Input track (mix) to be mixing style transferred", type='filepath')
                with gr.Tab("YouTube url"):
                    with gr.Row():
                        yt_link_in = gr.Textbox(
                            label="Enter YouTube Link of the Video", autofocus=True, lines=3
                        )
                        yt_in_start_sec = gr.Number(
                            value=0,
                            label="starting point of the song (in seconds)"
                        )
                        yt_in_duration_sec = gr.Number(
                            value=30,
                            label="duration of the song (in seconds)"
                        )
                        yt_btn_in = gr.Button("Download Audio from YouTube Link", size="lg")
                    yt_audio_path_in = gr.Audio(
                        label="Input Audio Extracted from the YouTube Video", interactive=False
                    )
                    yt_btn_in.click(
                        get_audio_from_yt_video_input,
                        inputs=[yt_link_in, yt_in_start_sec, yt_in_duration_sec],
                        outputs=[yt_audio_path_in, file_uploaded_in],
                    )
            with gr.Blocks():
                with gr.Tab("Reference Music"):
                    file_uploaded_ref = gr.Audio(label="Reference track (mix) to copy mixing style", type='filepath')
                with gr.Tab("YouTube url"):
                    with gr.Row():
                        yt_link_ref = gr.Textbox(
                            label="Enter YouTube Link of the Video", autofocus=True, lines=3
                        )
                        yt_ref_start_sec = gr.Number(
                            value=0,
                            label="starting point of the song (in seconds)"
                        )
                        yt_ref_duration_sec = gr.Number(
                            value=30,
                            label="duration of the song (in seconds)"
                        )
                        yt_btn_ref = gr.Button("Download Audio from YouTube Link", size="lg")
                    yt_audio_path_ref = gr.Audio(
                        label="Reference Audio Extracted from the YouTube Video", interactive=False
                    )
                    yt_btn_ref.click(
                        get_audio_from_yt_video_ref,
                        inputs=[yt_link_ref, yt_ref_start_sec, yt_ref_duration_sec],
                        outputs=[yt_audio_path_ref, file_uploaded_ref],
                    )
                
    with gr.Group():
        gr.HTML(
            """
            <div> <h3> <center> Mixing Style Transfer. Perform stem-wise audio-effects style conversion by first source separating the input mix. The inference computation time takes longer as the input samples' duration. so plz be patient...  </h3> </div>
            """
        )
        with gr.Column():
            inference_btn = gr.Button("Run Mixing Style Transfer")
        with gr.Row():
            output_mix = gr.Audio(label="mixing style transferred music track", type='numpy')
            inference_btn.click(
                inference,
                inputs=[file_uploaded_in, file_uploaded_ref],
                outputs=[output_mix],
            )


    
if __name__ == "__main__":
    demo.launch(debug=True)