Spaces:
Sleeping
Sleeping
Commit
ยท
dae5b5d
1
Parent(s):
9551e0c
Update app.py
Browse files
app.py
CHANGED
@@ -1,137 +1,98 @@
|
|
1 |
-
import os
|
2 |
-
import torch
|
3 |
-
import librosa
|
4 |
-
import binascii
|
5 |
-
import warnings
|
6 |
-
import midi2audio # MIDI ํ์ผ์ WAV ํ์ผ๋ก
|
7 |
-
import numpy as np
|
8 |
-
import pytube as pt # YouTube
|
9 |
-
import gradio as gr
|
10 |
-
import soundfile as sf
|
11 |
-
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
os.makedirs(
|
|
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
22 |
|
|
|
23 |
def get_audio_from_yt_video(yt_link):
|
24 |
try:
|
25 |
-
yt = pt.YouTube(yt_link)
|
26 |
-
t = yt.streams.filter(only_audio=True)
|
27 |
-
filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
|
28 |
-
t[0].download(filename=filename)
|
29 |
except:
|
30 |
-
warnings.warn(f"Video Not Found at {yt_link}")
|
31 |
filename = None
|
32 |
-
|
33 |
return filename, filename
|
34 |
-
|
35 |
-
def inference(file_uploaded, composer):
|
36 |
-
waveform, sr = librosa.load(file_uploaded, sr=None) # ํ์ผ์์ ์ค๋์ค ๋ฐ์ดํฐ์ ์ํ๋ง ์ฃผํ์ ๋ก๋
|
37 |
-
|
38 |
-
inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device) # ์
๋ ฅ ๋ฐ์ดํฐ ์ ์ฒ๋ฆฌ
|
39 |
-
model_output = model.generate(input_features=inputs["input_features"], composer=composer) # ๋ชจ๋ธ์ ์
๋ ฅํ์ฌ ์ถ๋ ฅ ์์ฑ
|
40 |
-
tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"] # ํ ํฐ ๋์ฝ๋ฉ
|
41 |
-
|
42 |
-
return prepare_output_file(tokenizer_output, sr) # ์ถ๋ ฅ ํ์ผ ์ค๋น ํจ์ ํธ์ถ
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
wav_output = midi_output.replace(".mid", ".wav") # WAV ์ถ๋ ฅ ํ์ผ ๊ฒฝ๋ก
|
51 |
-
midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output) # MIDI๋ฅผ WAV๋ก ๋ณํ
|
52 |
-
|
53 |
-
return wav_output, wav_output, midi_output # WAV ๋ฐ MIDI ํ์ผ ๊ฒฝ๋ก ๋ฐํ
|
54 |
|
55 |
-
|
56 |
-
pop_y, sr = librosa.load(pop_path, sr=None) # ํ ์์
ํ์ผ ๋ก๋
|
57 |
-
midi_y, _ = librosa.load(midi.name, sr=None) # MIDI ํ์ผ ๋ก๋
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
return stereo_mix_path, stereo_mix_path # ์คํ
๋ ์ค ๋ฏน์ค ํ์ผ ๊ฒฝ๋ก ๋ฐํ
|
69 |
|
70 |
-
|
|
|
71 |
|
72 |
with block:
|
73 |
gr.HTML(
|
74 |
"""
|
75 |
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
|
76 |
-
<
|
77 |
-
|
78 |
-
|
79 |
-
align-items: center;
|
80 |
-
gap: 0.8rem;
|
81 |
-
font-size: 1.75rem;
|
82 |
-
"
|
83 |
-
>
|
84 |
-
<h1 style="font-weight: 900; margin-bottom: 12px;">
|
85 |
-
๐น Pop2Piano : ํผ์๋
ธ ์ปค๋ฒ๊ณก ์์ฑ๊ธฐ ๐น
|
86 |
-
</h1>
|
87 |
-
</div>
|
88 |
<p style="margin-bottom: 12px; font-size: 90%">
|
89 |
-
|
90 |
-
|
91 |
</p>
|
92 |
</div>
|
93 |
"""
|
94 |
)
|
95 |
with gr.Group():
|
96 |
-
with gr.Row(
|
97 |
with gr.Column():
|
98 |
file_uploaded = gr.Audio(label="์ค๋์ค ์
๋ก๋", type="filepath")
|
99 |
with gr.Column():
|
100 |
with gr.Row():
|
101 |
yt_link = gr.Textbox(label="์ ํ๋ธ ๋งํฌ๋ฅผ ์
๋ ฅํ์ธ์.", autofocus=True, lines=3)
|
102 |
yt_btn = gr.Button("์ ํ๋ธ ๋งํฌ์์ ์ค๋์ค๋ฅผ ๋ค์ด ๋ฐ์ต๋๋ค.", size="lg")
|
103 |
-
|
104 |
-
|
105 |
-
yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
|
106 |
|
107 |
with gr.Group():
|
108 |
with gr.Column():
|
109 |
-
composer = gr.Dropdown(label="
|
110 |
generate_btn = gr.Button("๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก ๋ง๋ค๊ธฐ๐น๐ต")
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
115 |
wav_output2 = gr.File(label="๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก์ ๋ค์ด๋ก๋ (.wav)")
|
116 |
wav_output1 = gr.Audio(label="๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก ๋ฃ๊ธฐ")
|
117 |
-
midi_output = gr.File(label="์์ฑํ
|
118 |
-
generate_btn.click(
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
gr.HTML(
|
127 |
-
"""
|
128 |
-
<div class="footer">
|
129 |
-
<center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
|
130 |
-
<center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
|
131 |
-
<center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
|
132 |
-
</p>
|
133 |
-
</div>
|
134 |
-
"""
|
135 |
-
)
|
136 |
|
137 |
-
block.launch(debug=False)
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
import librosa
|
4 |
+
import binascii
|
5 |
+
import warnings
|
6 |
+
import midi2audio # MIDI ํ์ผ์ WAV ํ์ผ๋ก ๋ณํ
|
7 |
+
import numpy as np
|
8 |
+
import pytube as pt # YouTube ๋น๋์ค๋ฅผ ์ค๋์ค๋ก ๋ค์ด๋ก๋
|
9 |
+
import gradio as gr
|
10 |
+
import soundfile as sf
|
11 |
+
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
12 |
|
13 |
+
# ๋๋ ํ ๋ฆฌ ์์ฑ
|
14 |
+
yt_video_dir = "./yt_dir" # ์ ํ๋ธ ๋น๋์ค ๋ค์ด๋ก๋ ๊ฒฝ๋ก
|
15 |
+
outputs_dir = "./midi_wav_outputs" # ์ถ๋ ฅ ํ์ผ ๊ฒฝ๋ก
|
16 |
+
os.makedirs(outputs_dir, exist_ok=True)
|
17 |
+
os.makedirs(yt_video_dir, exist_ok=True)
|
18 |
|
19 |
+
# ๋ชจ๋ธ ์ค์
|
20 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
+
model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device)
|
22 |
+
processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
|
23 |
+
composers = model.generation_config.composer_to_feature_token.keys()
|
24 |
|
25 |
+
# ์ ํ๋ธ ๋น๋์ค์์ ์ค๋์ค ์ถ์ถ ํจ์
|
26 |
def get_audio_from_yt_video(yt_link):
|
27 |
try:
|
28 |
+
yt = pt.YouTube(yt_link)
|
29 |
+
t = yt.streams.filter(only_audio=True)
|
30 |
+
filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
|
31 |
+
t[0].download(filename=filename)
|
32 |
except:
|
33 |
+
warnings.warn(f"Video Not Found at {yt_link}")
|
34 |
filename = None
|
35 |
+
|
36 |
return filename, filename
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
# ๋ชจ๋ธ ์ถ๋ก ํจ์
|
39 |
+
def inference(file_uploaded, composer):
|
40 |
+
waveform, sr = librosa.load(file_uploaded, sr=None)
|
41 |
+
inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
|
42 |
+
model_output = model.generate(input_features=inputs["input_features"], composer=composer)
|
43 |
+
tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
return prepare_output_file(tokenizer_output, sr)
|
|
|
|
|
46 |
|
47 |
+
# ์ถ๋ ฅ ํ์ผ ์ค๋น ํจ์
|
48 |
+
def prepare_output_file(tokenizer_output, sr):
|
49 |
+
output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
|
50 |
+
midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
|
51 |
+
tokenizer_output[0].write(midi_output)
|
52 |
+
wav_output = midi_output.replace(".mid", ".wav")
|
53 |
+
midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
|
54 |
+
|
55 |
+
return wav_output, wav_output, midi_output
|
|
|
56 |
|
57 |
+
# Gradio UI ์ค์
|
58 |
+
block = gr.Blocks(theme="Taithrah/Minimal")
|
59 |
|
60 |
with block:
|
61 |
gr.HTML(
|
62 |
"""
|
63 |
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
|
64 |
+
<h1 style="font-weight: 900; margin-bottom: 12px;">
|
65 |
+
๐น Pop2Piano : ํผ์๋
ธ ์ปค๋ฒ๊ณก ์์ฑ๊ธฐ ๐น
|
66 |
+
</h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
<p style="margin-bottom: 12px; font-size: 90%">
|
68 |
+
Pop2Piano ๋ฐ๋ชจ: ํ ์ค๋์ค ๊ธฐ๋ฐ ํผ์๋
ธ ์ปค๋ฒ๊ณก ์์ฑ. <br>
|
69 |
+
์๊ณก๊ฐ(ํธ๊ณก์)๋ฅผ ์ ํํ๊ณ ํ ์ค๋์ค๋ฅผ ์
๋ก๋ํ๊ฑฐ๋ ์ ํ๋ธ ๋งํฌ๋ฅผ ์
๋ ฅํ ํ ์์ฑ ๋ฒํผ์ ํด๋ฆญํ์ธ์.
|
70 |
</p>
|
71 |
</div>
|
72 |
"""
|
73 |
)
|
74 |
with gr.Group():
|
75 |
+
with gr.Row():
|
76 |
with gr.Column():
|
77 |
file_uploaded = gr.Audio(label="์ค๋์ค ์
๋ก๋", type="filepath")
|
78 |
with gr.Column():
|
79 |
with gr.Row():
|
80 |
yt_link = gr.Textbox(label="์ ํ๋ธ ๋งํฌ๋ฅผ ์
๋ ฅํ์ธ์.", autofocus=True, lines=3)
|
81 |
yt_btn = gr.Button("์ ํ๋ธ ๋งํฌ์์ ์ค๋์ค๋ฅผ ๋ค์ด ๋ฐ์ต๋๋ค.", size="lg")
|
82 |
+
yt_audio_path = gr.Audio(label="์ ํ๋ธ ๋์์์์ ์ถ์ถํ ์ค๋์ค", interactive=False)
|
83 |
+
yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
|
|
|
84 |
|
85 |
with gr.Group():
|
86 |
with gr.Column():
|
87 |
+
composer = gr.Dropdown(label="ํธ๊ณก์", choices=composers, value="composer1")
|
88 |
generate_btn = gr.Button("๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก ๋ง๋ค๊ธฐ๐น๐ต")
|
89 |
+
with gr.Row():
|
|
|
|
|
|
|
90 |
wav_output2 = gr.File(label="๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก์ ๋ค์ด๋ก๋ (.wav)")
|
91 |
wav_output1 = gr.Audio(label="๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก ๋ฃ๊ธฐ")
|
92 |
+
midi_output = gr.File(label="์์ฑํ midi ํ์ผ ๋ค์ด๋ก๋ (.mid)")
|
93 |
+
generate_btn.click(
|
94 |
+
inference,
|
95 |
+
inputs=[file_uploaded, composer],
|
96 |
+
outputs=[wav_output1, wav_output2, midi_output])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
+
block.launch(debug=False)
|