Spaces:
Sleeping
Sleeping
Commit
ยท
376a444
1
Parent(s):
d08d343
Update app.py
Browse files
app.py
CHANGED
@@ -92,6 +92,142 @@ with block:
|
|
92 |
</div>
|
93 |
"""
|
94 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
with gr.Group():
|
96 |
with gr.Row(equal_height=True):
|
97 |
with gr.Column():
|
@@ -131,4 +267,5 @@ with block:
|
|
131 |
"""
|
132 |
)
|
133 |
|
134 |
-
block.launch(debug=False)
|
|
|
|
92 |
</div>
|
93 |
"""
|
94 |
)
|
95 |
+
import os # ํ์ผ ๋ฐ ๋๋ ํ ๋ฆฌ ์์
์ ์ํ ๋ชจ๋
|
96 |
+
import torch # ๋ฅ๋ฌ๋ ํ๋ ์์ํฌ PyTorch
|
97 |
+
import librosa # ์ค๋์ค ์ฒ๋ฆฌ๋ฅผ ์ํ ๋ชจ๋
|
98 |
+
import binascii # ์ด์ง ๋ฐ์ดํฐ๋ฅผ ๋ค๋ฃจ๋ ๋ชจ๋
|
99 |
+
import warnings # ๊ฒฝ๊ณ ๋ฉ์์ง๋ฅผ ์ถ๋ ฅํ๋ ๋ชจ๋
|
100 |
+
import midi2audio # MIDI ํ์ผ์ WAV ํ์ผ๋ก ๋ณํํ๋ ๋ชจ๋
|
101 |
+
import numpy as np # ๋ค์ฐจ์ ๋ฐฐ์ด์ ๋ค๋ฃจ๋ ๋ชจ๋
|
102 |
+
import pytube as pt # YouTube ๋์์์ ๋ค์ด๋ก๋ํ๋ ๋ชจ๋
|
103 |
+
import gradio as gr # ์ธํฐ๋ํฐ๋ธํ UI๋ฅผ ๋ง๋ค๊ธฐ ์ํ ๋ชจ๋
|
104 |
+
import soundfile as sf # ์ฌ์ด๋ ํ์ผ์ ๋ค๋ฃจ๋ ๋ชจ๋
|
105 |
+
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor # Pop2Piano ๋ชจ๋ธ๊ณผ ์ ์ฒ๋ฆฌ๊ธฐ
|
106 |
+
|
107 |
+
yt_video_dir = "./yt_dir" # YouTube ๋์์ ๋ค์ด๋ก๋ ๋๋ ํ ๋ฆฌ ๊ฒฝ๋ก
|
108 |
+
outputs_dir = "./midi_wav_outputs" # MIDI ๋ฐ WAV ํ์ผ ์ถ๋ ฅ ๋๋ ํ ๋ฆฌ ๊ฒฝ๋ก
|
109 |
+
os.makedirs(outputs_dir, exist_ok=True) # ์ถ๋ ฅ ๋๋ ํ ๋ฆฌ ์์ฑ (์ด๋ฏธ ์กด์ฌํ๋ ๊ฒฝ์ฐ ๋ฌด์)
|
110 |
+
os.makedirs(yt_video_dir, exist_ok=True) # YouTube ๋์์ ๋ค์ด๋ก๋ ๋๋ ํ ๋ฆฌ ์์ฑ (์ด๋ฏธ ์กด์ฌํ๋ ๊ฒฝ์ฐ ๋ฌด์)
|
111 |
+
|
112 |
+
device = "cuda" if torch.cuda.is_available() else "cpu" # CUDA๊ฐ ์ฌ์ฉ ๊ฐ๋ฅํ ๊ฒฝ์ฐ GPU๋ฅผ ์ฌ์ฉํ๊ณ , ๊ทธ๋ ์ง ์์ ๊ฒฝ์ฐ CPU๋ฅผ ์ฌ์ฉ
|
113 |
+
model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device) # ์ฌ์ ํ์ต๋ Pop2Piano ๋ชจ๋ธ ๋ก๋
|
114 |
+
processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano") # ์ฌ์ ํ์ต๋ Pop2Piano ์ ์ฒ๋ฆฌ๊ธฐ ๋ก๋
|
115 |
+
composers = model.generation_config.composer_to_feature_token.keys() # ์๊ณก๊ฐ ๋ชฉ๋ก ๊ฐ์ ธ์ค๊ธฐ
|
116 |
+
|
117 |
+
def get_audio_from_yt_video(yt_link):
|
118 |
+
try:
|
119 |
+
yt = pt.YouTube(yt_link) # YouTube ๋์์ ๊ฐ์ฒด ์์ฑ
|
120 |
+
t = yt.streams.filter(only_audio=True) # ์ค๋์ค ์คํธ๋ฆผ ํํฐ๋ง
|
121 |
+
filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4") # ๋๋ค ํ์ผ ์ด๋ฆ ์์ฑ
|
122 |
+
t[0].download(filename=filename) # ๋์์ ๋ค์ด๋ก๋
|
123 |
+
except:
|
124 |
+
warnings.warn(f"Video Not Found at {yt_link}") # ๊ฒฝ๊ณ ๋ฉ์์ง ์ถ๋ ฅ
|
125 |
+
filename = None
|
126 |
+
|
127 |
+
return filename, filename
|
128 |
+
|
129 |
+
def inference(file_uploaded, composer):
|
130 |
+
waveform, sr = librosa.load(file_uploaded, sr=None) # ํ์ผ์์ ์ค๋์ค ๋ฐ์ดํฐ์ ์ํ๋ง ์ฃผํ์ ๋ก๋
|
131 |
+
|
132 |
+
inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device) # ์
๋ ฅ ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ
|
133 |
+
model_output = model.generate(input_features=inputs["input_features"], composer=composer) # ๋ชจ๋ธ์ ์
๋ ฅํ์ฌ ์ถ๋ ฅ ์์ฑ
|
134 |
+
tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"] # ํ ํฐ ๋์ฝ๋ฉ
|
135 |
+
|
136 |
+
return prepare_output_file(tokenizer_output, sr) # ์ถ๋ ฅ ํ์ผ ์ค๋น ํจ์ ํธ์ถ
|
137 |
+
|
138 |
+
def prepare_output_file(tokenizer_output, sr):
|
139 |
+
output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode() # ๋๋ค ์ถ๋ ฅ ํ์ผ ์ด๋ฆ ์์ฑ
|
140 |
+
midi_output = os.path.join(outputs_dir, output_file_name + ".mid") # MIDI ์ถ๋ ฅ ํ์ผ ๊ฒฝ๋ก
|
141 |
+
|
142 |
+
tokenizer_output[0].write(midi_output) # MIDI ํ์ผ ์์ฑ
|
143 |
+
|
144 |
+
wav_output = midi_output.replace(".mid", ".wav") # WAV ์ถ๋ ฅ ํ์ผ ๊ฒฝ๋ก
|
145 |
+
midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output) # MIDI๋ฅผ WAV๋ก ๋ณํ
|
146 |
+
|
147 |
+
return wav_output, wav_output, midi_output # WAV ๋ฐ MIDI ํ์ผ ๊ฒฝ๋ก ๋ฐํ
|
148 |
+
|
149 |
+
def get_stereo(pop_path, midi, pop_scale=0.5):
|
150 |
+
pop_y, sr = librosa.load(pop_path, sr=None) # ํ ์์
ํ์ผ ๋ก๋
|
151 |
+
midi_y, _ = librosa.load(midi.name, sr=None) # MIDI ํ์ผ ๋ก๋
|
152 |
+
|
153 |
+
if len(pop_y) > len(midi_y):
|
154 |
+
midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y))) # MIDI ๊ธธ์ด๋ฅผ ํ ์์
๊ธธ์ด์ ๋ง์ถค
|
155 |
+
elif len(pop_y) < len(midi_y):
|
156 |
+
pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y))) # ํ ์์
๊ธธ์ด๋ฅผ MIDI ๊ธธ์ด์ ๋ง์ถค
|
157 |
+
stereo = np.stack((midi_y, pop_y * pop_scale)) # ์คํ
๋ ์ค ๋ฏน์ค ์์ฑ
|
158 |
+
|
159 |
+
stereo_mix_path = pop_path.replace("output", "output_stereo_mix") # ์คํ
๋ ์ค ๋ฏน์ค ํ์ผ ๊ฒฝ๋ก
|
160 |
+
sf.write(file=stereo_mix_path, data=stereo.T, samplerate=sr, format="wav") # ์คํ
๋ ์ค ๋ฏน์ค ํ์ผ ์์ฑ
|
161 |
+
|
162 |
+
return stereo_mix_path, stereo_mix_path # ์คํ
๋ ์ค ๋ฏน์ค ํ์ผ ๊ฒฝ๋ก ๋ฐํ
|
163 |
+
|
164 |
+
block = gr.Blocks("Taithrah/Minimal") # Gradio ๋ธ๋ก ์์ฑ
|
165 |
+
|
166 |
+
with block:
|
167 |
+
gr.HTML(
|
168 |
+
"""
|
169 |
+
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
|
170 |
+
<div
|
171 |
+
style="
|
172 |
+
display: inline-flex;
|
173 |
+
align-items: center;
|
174 |
+
gap: 0.8rem;
|
175 |
+
font-size: 1.75rem;
|
176 |
+
"
|
177 |
+
>
|
178 |
+
<h1 style="font-weight: 900; margin-bottom: 12px;">
|
179 |
+
๐น Pop2Piano : ํผ์๋
ธ ์ปค๋ฒ๊ณก ์์ฑ๊ธฐ ๐น
|
180 |
+
</h1>
|
181 |
+
</div>
|
182 |
+
<p style="margin-bottom: 12px; font-size: 90%">
|
183 |
+
A demo for Pop2Piano: Pop Audio-based Piano Cover Generation. <br>
|
184 |
+
Please select the composer (Arranger) and upload the pop audio or enter the YouTube link and then click Generate.
|
185 |
+
</p>
|
186 |
+
</div>
|
187 |
+
"""
|
188 |
+
)
|
189 |
+
with gr.Group():
|
190 |
+
with gr.Row(equal_height=True):
|
191 |
+
with gr.Column():
|
192 |
+
file_uploaded = gr.Audio(label="์ค๋์ค ์
๋ก๋", type="filepath")
|
193 |
+
with gr.Column():
|
194 |
+
with gr.Row():
|
195 |
+
yt_link = gr.Textbox(label="์ ํ๋ธ ๋งํฌ๋ฅผ ์
๋ ฅํ์ธ์.", autofocus=True, lines=3)
|
196 |
+
yt_btn = gr.Button("์ ํ๋ธ ๋งํฌ์์ ์ค๋์ค๋ฅผ ๋ค์ด ๋ฐ์ต๋๋ค.", size="lg")
|
197 |
+
|
198 |
+
yt_audio_path = gr.Audio(label="์ ํ๋ธ ๋์์์์ ์ถ์ถํ ์ค๋์ค", interactive=False)
|
199 |
+
yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
|
200 |
+
|
201 |
+
with gr.Group():
|
202 |
+
with gr.Column():
|
203 |
+
composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
|
204 |
+
generate_btn = gr.Button("๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก ๋ง๋ค๊ธฐ๐น๐ต")
|
205 |
+
|
206 |
+
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
207 |
+
wav_output2 = gr.File(label="๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก์ ๋ค์ด๋ก๋ (.wav)")
|
208 |
+
wav_output1 = gr.Audio(label="๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก ๋ฃ๊ธฐ")
|
209 |
+
midi_output = gr.File(label="์์ฑํ midi ํ์ผ ๋ค์ด๋ก๋ (.mid)")
|
210 |
+
generate_btn.click(inference,
|
211 |
+
inputs=[file_uploaded, composer],
|
212 |
+
outputs=[wav_output1, wav_output2, midi_output])
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
gr.HTML(
|
218 |
+
"""
|
219 |
+
<div class="footer">
|
220 |
+
<center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
|
221 |
+
<center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
|
222 |
+
<center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
|
223 |
+
</p>
|
224 |
+
</div>
|
225 |
+
"""
|
226 |
+
)
|
227 |
+
|
228 |
+
block.launch(debug=False)
|
229 |
+
|
230 |
+
"""
|
231 |
with gr.Group():
|
232 |
with gr.Row(equal_height=True):
|
233 |
with gr.Column():
|
|
|
267 |
"""
|
268 |
)
|
269 |
|
270 |
+
block.launch(debug=False)
|
271 |
+
"""
|