DataBassist commited on
Commit
376a444
ยท
1 Parent(s): d08d343

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -1
app.py CHANGED
@@ -92,6 +92,142 @@ with block:
92
  </div>
93
  """
94
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  with gr.Group():
96
  with gr.Row(equal_height=True):
97
  with gr.Column():
@@ -131,4 +267,5 @@ with block:
131
  """
132
  )
133
 
134
- block.launch(debug=False)
 
 
92
  </div>
93
  """
94
  )
95
+ import os # ํŒŒ์ผ ๋ฐ ๋””๋ ‰ํ† ๋ฆฌ ์ž‘์—…์„ ์œ„ํ•œ ๋ชจ๋“ˆ
96
+ import torch # ๋”ฅ๋Ÿฌ๋‹ ํ”„๋ ˆ์ž„์›Œํฌ PyTorch
97
+ import librosa # ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ ๋ชจ๋“ˆ
98
+ import binascii # ์ด์ง„ ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
99
+ import warnings # ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€๋ฅผ ์ถœ๋ ฅํ•˜๋Š” ๋ชจ๋“ˆ
100
+ import midi2audio # MIDI ํŒŒ์ผ์„ WAV ํŒŒ์ผ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๋ชจ๋“ˆ
101
+ import numpy as np # ๋‹ค์ฐจ์› ๋ฐฐ์—ด์„ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
102
+ import pytube as pt # YouTube ๋™์˜์ƒ์„ ๋‹ค์šด๋กœ๋“œํ•˜๋Š” ๋ชจ๋“ˆ
103
+ import gradio as gr # ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒํ•œ UI๋ฅผ ๋งŒ๋“ค๊ธฐ ์œ„ํ•œ ๋ชจ๋“ˆ
104
+ import soundfile as sf # ์‚ฌ์šด๋“œ ํŒŒ์ผ์„ ๋‹ค๋ฃจ๋Š” ๋ชจ๋“ˆ
105
+ from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor # Pop2Piano ๋ชจ๋ธ๊ณผ ์ „์ฒ˜๋ฆฌ๊ธฐ
106
+
107
+ yt_video_dir = "./yt_dir" # YouTube ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ
108
+ outputs_dir = "./midi_wav_outputs" # MIDI ๋ฐ WAV ํŒŒ์ผ ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ
109
+ os.makedirs(outputs_dir, exist_ok=True) # ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ (์ด๋ฏธ ์กด์žฌํ•˜๋Š” ๊ฒฝ์šฐ ๋ฌด์‹œ)
110
+ os.makedirs(yt_video_dir, exist_ok=True) # YouTube ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ (์ด๋ฏธ ์กด์žฌํ•˜๋Š” ๊ฒฝ์šฐ ๋ฌด์‹œ)
111
+
112
+ device = "cuda" if torch.cuda.is_available() else "cpu" # CUDA๊ฐ€ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๊ฒฝ์šฐ GPU๋ฅผ ์‚ฌ์šฉํ•˜๊ณ , ๊ทธ๋ ‡์ง€ ์•Š์€ ๊ฒฝ์šฐ CPU๋ฅผ ์‚ฌ์šฉ
113
+ model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device) # ์‚ฌ์ „ ํ•™์Šต๋œ Pop2Piano ๋ชจ๋ธ ๋กœ๋“œ
114
+ processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano") # ์‚ฌ์ „ ํ•™์Šต๋œ Pop2Piano ์ „์ฒ˜๋ฆฌ๊ธฐ ๋กœ๋“œ
115
+ composers = model.generation_config.composer_to_feature_token.keys() # ์ž‘๊ณก๊ฐ€ ๋ชฉ๋ก ๊ฐ€์ ธ์˜ค๊ธฐ
116
+
117
+ def get_audio_from_yt_video(yt_link):
118
+ try:
119
+ yt = pt.YouTube(yt_link) # YouTube ๋™์˜์ƒ ๊ฐ์ฒด ์ƒ์„ฑ
120
+ t = yt.streams.filter(only_audio=True) # ์˜ค๋””์˜ค ์ŠคํŠธ๋ฆผ ํ•„ํ„ฐ๋ง
121
+ filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4") # ๋žœ๋ค ํŒŒ์ผ ์ด๋ฆ„ ์ƒ์„ฑ
122
+ t[0].download(filename=filename) # ๋™์˜์ƒ ๋‹ค์šด๋กœ๋“œ
123
+ except:
124
+ warnings.warn(f"Video Not Found at {yt_link}") # ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
125
+ filename = None
126
+
127
+ return filename, filename
128
+
129
+ def inference(file_uploaded, composer):
130
+ waveform, sr = librosa.load(file_uploaded, sr=None) # ํŒŒ์ผ์—์„œ ์˜ค๋””์˜ค ๋ฐ์ดํ„ฐ์™€ ์ƒ˜ํ”Œ๋ง ์ฃผํŒŒ์ˆ˜ ๋กœ๋“œ
131
+
132
+ inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device) # ์ž…๋ ฅ ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ
133
+ model_output = model.generate(input_features=inputs["input_features"], composer=composer) # ๋ชจ๋ธ์— ์ž…๋ ฅํ•˜์—ฌ ์ถœ๋ ฅ ์ƒ์„ฑ
134
+ tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"] # ํ† ํฐ ๋””์ฝ”๋”ฉ
135
+
136
+ return prepare_output_file(tokenizer_output, sr) # ์ถœ๋ ฅ ํŒŒ์ผ ์ค€๋น„ ํ•จ์ˆ˜ ํ˜ธ์ถœ
137
+
138
+ def prepare_output_file(tokenizer_output, sr):
139
+ output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode() # ๋žœ๋ค ์ถœ๋ ฅ ํŒŒ์ผ ์ด๋ฆ„ ์ƒ์„ฑ
140
+ midi_output = os.path.join(outputs_dir, output_file_name + ".mid") # MIDI ์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ
141
+
142
+ tokenizer_output[0].write(midi_output) # MIDI ํŒŒ์ผ ์ž‘์„ฑ
143
+
144
+ wav_output = midi_output.replace(".mid", ".wav") # WAV ์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ
145
+ midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output) # MIDI๋ฅผ WAV๋กœ ๋ณ€ํ™˜
146
+
147
+ return wav_output, wav_output, midi_output # WAV ๋ฐ MIDI ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜
148
+
149
+ def get_stereo(pop_path, midi, pop_scale=0.5):
150
+ pop_y, sr = librosa.load(pop_path, sr=None) # ํŒ ์Œ์•… ํŒŒ์ผ ๋กœ๋“œ
151
+ midi_y, _ = librosa.load(midi.name, sr=None) # MIDI ํŒŒ์ผ ๋กœ๋“œ
152
+
153
+ if len(pop_y) > len(midi_y):
154
+ midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y))) # MIDI ๊ธธ์ด๋ฅผ ํŒ ์Œ์•… ๊ธธ์ด์— ๋งž์ถค
155
+ elif len(pop_y) < len(midi_y):
156
+ pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y))) # ํŒ ์Œ์•… ๊ธธ์ด๋ฅผ MIDI ๊ธธ์ด์— ๋งž์ถค
157
+ stereo = np.stack((midi_y, pop_y * pop_scale)) # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ์ƒ์„ฑ
158
+
159
+ stereo_mix_path = pop_path.replace("output", "output_stereo_mix") # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ๊ฒฝ๋กœ
160
+ sf.write(file=stereo_mix_path, data=stereo.T, samplerate=sr, format="wav") # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ์ž‘์„ฑ
161
+
162
+ return stereo_mix_path, stereo_mix_path # ์Šคํ…Œ๋ ˆ์˜ค ๋ฏน์Šค ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฐ˜ํ™˜
163
+
164
+ block = gr.Blocks("Taithrah/Minimal") # Gradio ๋ธ”๋ก ์ƒ์„ฑ
165
+
166
+ with block:
167
+ gr.HTML(
168
+ """
169
+ <div style="text-align: center; max-width: 800px; margin: 0 auto;">
170
+ <div
171
+ style="
172
+ display: inline-flex;
173
+ align-items: center;
174
+ gap: 0.8rem;
175
+ font-size: 1.75rem;
176
+ "
177
+ >
178
+ <h1 style="font-weight: 900; margin-bottom: 12px;">
179
+ ๐ŸŽน Pop2Piano : ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ์ƒ์„ฑ๊ธฐ ๐ŸŽน
180
+ </h1>
181
+ </div>
182
+ <p style="margin-bottom: 12px; font-size: 90%">
183
+ A demo for Pop2Piano: Pop Audio-based Piano Cover Generation. <br>
184
+ Please select the composer (Arranger) and upload the pop audio or enter the YouTube link and then click Generate.
185
+ </p>
186
+ </div>
187
+ """
188
+ )
189
+ with gr.Group():
190
+ with gr.Row(equal_height=True):
191
+ with gr.Column():
192
+ file_uploaded = gr.Audio(label="์˜ค๋””์˜ค ์—…๋กœ๋“œ", type="filepath")
193
+ with gr.Column():
194
+ with gr.Row():
195
+ yt_link = gr.Textbox(label="์œ ํŠœ๋ธŒ ๋งํฌ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”.", autofocus=True, lines=3)
196
+ yt_btn = gr.Button("์œ ํŠœ๋ธŒ ๋งํฌ์—์„œ ์˜ค๋””์˜ค๋ฅผ ๋‹ค์šด ๋ฐ›์Šต๋‹ˆ๋‹ค.", size="lg")
197
+
198
+ yt_audio_path = gr.Audio(label="์œ ํŠœ๋ธŒ ๋™์˜์ƒ์—์„œ ์ถ”์ถœํ•œ ์˜ค๋””์˜ค", interactive=False)
199
+ yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
200
+
201
+ with gr.Group():
202
+ with gr.Column():
203
+ composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
204
+ generate_btn = gr.Button("๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ๋งŒ๋“ค๊ธฐ๐ŸŽน๐ŸŽต")
205
+
206
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
207
+ wav_output2 = gr.File(label="๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก์„ ๋‹ค์šด๋กœ๋“œ (.wav)")
208
+ wav_output1 = gr.Audio(label="๋‚˜๋งŒ์˜ ํ”ผ์•„๋…ธ ์ปค๋ฒ„๊ณก ๋“ฃ๊ธฐ")
209
+ midi_output = gr.File(label="์ƒ์„ฑํ•œ midi ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ (.mid)")
210
+ generate_btn.click(inference,
211
+ inputs=[file_uploaded, composer],
212
+ outputs=[wav_output1, wav_output2, midi_output])
213
+
214
+
215
+
216
+
217
+ gr.HTML(
218
+ """
219
+ <div class="footer">
220
+ <center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
221
+ <center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
222
+ <center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
223
+ </p>
224
+ </div>
225
+ """
226
+ )
227
+
228
+ block.launch(debug=False)
229
+
230
+ """
231
  with gr.Group():
232
  with gr.Row(equal_height=True):
233
  with gr.Column():
 
267
  """
268
  )
269
 
270
+ block.launch(debug=False)
271
+ """