DataBassist commited on
Commit
b2b4fc2
ยท
1 Parent(s): 74309ac

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -0
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os # ํŒŒ์ผ ๋ฐ ๋””๋ ‰ํ† ๋ฆฌ ์ž‘์—…์„ ์œ„ํ•œ ๋ชจ๋“ˆ
3
+ import torch # ๋”ฅ๋Ÿฌ๋‹ ํ”„๋ ˆ์ž„์›Œํฌ PyTorch
4
+ import librosa # ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ ๋ชจ๋“ˆ
5
+ import binascii # ์ด์ง„ ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
6
+ import warnings # ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€๋ฅผ ์ถœ๋ ฅํ•˜๋Š” ๋ชจ๋“ˆ
7
+ import midi2audio # MIDI ํŒŒ์ผ์„ WAV ํŒŒ์ผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๋ชจ๋“ˆ
8
+ import numpy as np # ๋‹ค์ฐจ์› ๋ฐฐ์—ด์„ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
9
+ import pytube as pt # YouTube ๋™์˜์ƒ์„ ๋‹ค์šด๋กœ๋“œํ•˜๋Š” ๋ชจ๋“ˆ
10
+ import gradio as gr # ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒํ•œ UI๋ฅผ ๋งŒ๋“ค๊ธฐ ์œ„ํ•œ ๋ชจ๋“ˆ
11
+ import soundfile as sf # ์‚ฌ์šด๋“œ ํŒŒ์ผ์„ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
12
+ from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor # Pop2Piano ๋ชจ๋ธ๊ณผ ์ „์ฒ˜๋ฆฌ๊ธฐ
13
+
14
+ yt_video_dir = "./yt_dir" # YouTube ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ
15
+ outputs_dir = "./midi_wav_outputs" # MIDI ๋ฐ WAV ํŒŒ์ผ ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ
16
+ os.makedirs(outputs_dir, exist_ok=True) # ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ (์ด๋ฏธ ์กด์žฌํ•˜๋Š” ๊ฒฝ์šฐ ๋ฌด์‹œ)
17
+ os.makedirs(yt_video_dir, exist_ok=True) # YouTube ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ (์ด๋ฏธ ์กด์žฌํ•˜๋Š” ๊ฒฝ์šฐ ๋ฌด์‹œ)
18
+
19
+ device = "cuda" if torch.cuda.is_available() else "cpu" # CUDA๊ฐ€ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๊ฒฝ์šฐ GPU๋ฅผ ์‚ฌ์šฉํ•˜๊ณ , ๊ทธ๋ ‡์ง€ ์•Š์€ ๊ฒฝ์šฐ CPU๋ฅผ ์‚ฌ์šฉ
20
+ model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device) # ์‚ฌ์ „ ํ•™์Šต๋œ Pop2Piano ๋ชจ๋ธ ๋กœ๋“œ
21
+ processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano") # ์‚ฌ์ „ ํ•™์Šต๋œ Pop2Piano ์ „์ฒ˜๋ฆฌ๊ธฐ ๋กœ๋“œ
22
+ composers = model.generation_config.composer_to_feature_token.keys() # ์ž‘๊ณก๊ฐ€ ๋ชฉ๋ก ๊ฐ€์ ธ์˜ค๊ธฐ
23
+
24
+ def get_audio_from_yt_video(yt_link):
25
+ try:
26
+ yt = pt.YouTube(yt_link) # YouTube ๋™์˜์ƒ ๊ฐ์ฒด ์ƒ์„ฑ
27
+ t = yt.streams.filter(only_audio=True) # ์˜ค๋””์˜ค ์ŠคํŠธ๋ฆผ ํ•„ํ„ฐ๋ง
28
+ filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4") # ๋žœ๋ค ํŒŒ์ผ ์ด๋ฆ„ ์ƒ์„ฑ
29
+ t[0].download(filename=filename) # ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ
30
+ except:
31
+ warnings.warn(f"Video Not Found at {yt_link}") # ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
32
+ filename = None
33
+
34
+ return filename, filename
35
+
36
+ def inference(file_uploaded, composer):
37
+ waveform, sr = librosa.load(file_uploaded, sr=None) # ํŒŒ์ผ์—์„œ ์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ์™€ ์ƒ˜ํ”Œ๋ง ์ฃผํŒŒ์ˆ˜ ๋กœ๋“œ
38
+
39
+ inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device) # ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ
40
+ model_output = model.generate(input_features=inputs["input_features"], composer=composer) # ๋ชจ๋ธ์— ์ž…๋ ฅํ•˜์—ฌ ์ถœ๋ ฅ ์ƒ์„ฑ
41
+ tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"] # ํ† ํฐ ๋””์ฝ”๋”ฉ
42
+
43
+ return prepare_output_file(tokenizer_output, sr) # ์ถœ๋ ฅ ํŒŒ์ผ ์ค€๋น„ ํ•จ์ˆ˜ ํ˜ธ์ถœ
44
+
45
+ def prepare_output_file(tokenizer_output, sr):
46
+ output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode() # ๋žœ๋ค ์ถœ๋ ฅ ํŒŒ์ผ ์ด๋ฆ„ ์ƒ์„ฑ
47
+ midi_output = os.path.join(outputs_dir, output_file_name + ".mid") # MIDI ์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ
48
+
49
+ tokenizer_output[0].write(midi_output) # MIDI ํŒŒ์ผ ์ž‘์„ฑ
50
+
51
+ wav_output = midi_output.replace(".mid", ".wav") # WAV ์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ
52
+ midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output) # MIDI๋ฅผ WAV๋กœ ๋ณ€ํ™˜
53
+
54
+ return wav_output, wav_output, midi_output # WAV ๋ฐ MIDI ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜
55
+
56
+ def get_stereo(pop_path, midi, pop_scale=0.5):
57
+ pop_y, sr = librosa.load(pop_path, sr=None) # ํŒ ์Œ์•… ํŒŒ์ผ ๋กœ๋“œ
58
+ midi_y, _ = librosa.load(midi.name, sr=None) # MIDI ํŒŒ์ผ ๋กœ๋“œ
59
+
60
+ if len(pop_y) > len(midi_y):
61
+ midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y))) # MIDI ๊ธธ์ด๋ฅผ ํŒ ์Œ์•… ๊ธธ์ด์— ๋งž์ถค
62
+ elif len(pop_y) < len(midi_y):
63
+ pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y))) # ํŒ ์Œ์•… ๊ธธ์ด๋ฅผ MIDI ๊ธธ์ด์— ๋งž์ถค
64
+ stereo = np.stack((midi_y, pop_y * pop_scale)) # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ์ƒ์„ฑ
65
+
66
+ stereo_mix_path = pop_path.replace("output", "output_stereo_mix") # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ๊ฒฝ๋กœ
67
+ sf.write(file=stereo_mix_path, data=stereo.T, samplerate=sr, format="wav") # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ์ž‘์„ฑ
68
+
69
+ return stereo_mix_path, stereo_mix_path # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜
70
+
71
+ block = gr.Blocks(theme="Taithrah/Deep") # Gradio ๋ธ”๋ก ์ƒ์„ฑ
72
+
73
+ with block:
74
+ gr.HTML(
75
+ """
76
+ <div style="text-align: center; max-width: 800px; margin: 0 auto;">
77
+ <div
78
+ style="
79
+ display: inline-flex;
80
+ align-items: center;
81
+ gap: 0.8rem;
82
+ font-size: 1.75rem;
83
+ "
84
+ >
85
+ <h1 style="font-weight: 900; margin-bottom: 12px;">
86
+ ๐ŸŽน Pop2Piano : ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ์ƒ์„ฑ๊ธฐ ๐ŸŽน
87
+ </h1>
88
+ </div>
89
+ <p style="margin-bottom: 10px; font-size: 90%">
90
+ A demo for Pop2Piano: Pop Audio-based Piano Cover Generation. <br>
91
+ Please select the composer (Arranger) and upload the pop audio or enter the YouTube link and then click Generate.
92
+ </p>
93
+ </div>
94
+ """
95
+ )
96
+ with gr.Group():
97
+ with gr.Row(equal_height=True):
98
+ with gr.Column():
99
+ file_uploaded = gr.Audio(label="์˜ค๋””์˜ค ์—…๋กœ๋“œ", type="filepath")
100
+ with gr.Column():
101
+ with gr.Row():
102
+ yt_link = gr.Textbox(label="์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.", autofocus=True, lines=3)
103
+ yt_btn = gr.Button("์œ ํŠœ๋ธŒ ๋งํฌ์—์„œ ์˜ค๋””์˜ค๋ฅผ ๋‹ค์šด ๋ฐ›์Šต๋‹ˆ๋‹ค.", size="lg")
104
+
105
+ yt_audio_path = gr.Audio(label="์œ ํŠœ๋ธŒ ๋™์˜์ƒ์—์„œ ์ถ”์ถœํ•œ ์˜ค๋””์˜ค", interactive=False)
106
+ yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
107
+
108
+ with gr.Group():
109
+ with gr.Column():
110
+ composer = gr.Dropdown(label="ํŽธ๊ณก์ž ์„ ํƒ", choices=composers, value="composer1")
111
+ generate_btn = gr.Button("๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ๋งŒ๋“ค๊ธฐ ๐ŸŽน ๐ŸŽต")
112
+
113
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
114
+ wav_output2 = gr.File(label="๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก์„ ๋‹ค์šด๋กœ๋“œ (.wav)")
115
+ wav_output1 = gr.Audio(label="๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ๋“ฃ๊ธฐ")
116
+ midi_output = gr.File(label="์ƒ์„ฑํ•œ midi ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ (.mid)")
117
+ generate_btn.click(
118
+ inference, inputs=[file_uploaded, composer], outputs=[wav_output1, wav_output2, midi_output]
119
+ )
120
+
121
+ with gr.Group():
122
+ gr.HTML(
123
+ """
124
+ <div> <h3> <center> ์›๋ณธ ์˜ค๋””์˜ค์™€ MIDI๋ฅผ ํ˜ผํ•ฉํ•˜์—ฌ ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํ•˜๊ธฐ. </h3> </div>
125
+ """
126
+ )
127
+ pop_scale = gr.Slider(
128
+ 0, 1, value=0.5, label="์›๊ณก๊ณผ MIDI ์‚ฌ์ด์—์„œ ๋น„์œจ์„ ์กฐ์ •ํ•ด ๋ณด์„ธ์š”.", info="1.0 = ์›๊ณก, 0.0=.mid", interactive=True
129
+ ),
130
+ stereo_btn = gr.Button("Get Stereo Mix")
131
+ with gr.Row():
132
+ stereo_mix1 = gr.Audio(label="์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ๋“ฃ๊ธฐ")
133
+ stereo_mix2 = gr.File(label="์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ๋‹ค์šด๋กœ๋“œ")
134
+
135
+ stereo_btn.click(get_stereo, inputs=[file_uploaded, wav_output2, pop_scale[0]], outputs=[stereo_mix1, stereo_mix2])
136
+
137
+ gr.HTML(
138
+ """
139
+ <div class="footer">
140
+ <center>The design for this Space is taken from DataBassist </a>
141
+ </div>
142
+ """
143
+ )
144
+
145
+ gr.HTML(
146
+ """
147
+ <div class="footer">
148
+ <center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
149
+ <center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
150
+ <center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
151
+ </p>
152
+ </div>
153
+ """
154
+ )
155
+
156
+ block.launch(debug=False)