Spaces:
Sleeping
Sleeping
File size: 4,338 Bytes
dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d 376a444 dae5b5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import os
import torch
import librosa
import binascii
import warnings
import midi2audio # MIDI ํ์ผ์ WAV ํ์ผ๋ก ๋ณํ
import numpy as np
import pytube as pt # YouTube ๋น๋์ค๋ฅผ ์ค๋์ค๋ก ๋ค์ด๋ก๋
import gradio as gr
import soundfile as sf
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
# ๋๋ ํ ๋ฆฌ ์์ฑ
yt_video_dir = "./yt_dir" # ์ ํ๋ธ ๋น๋์ค ๋ค์ด๋ก๋ ๊ฒฝ๋ก
outputs_dir = "./midi_wav_outputs" # ์ถ๋ ฅ ํ์ผ ๊ฒฝ๋ก
os.makedirs(outputs_dir, exist_ok=True)
os.makedirs(yt_video_dir, exist_ok=True)
# ๋ชจ๋ธ ์ค์
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device)
processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
composers = model.generation_config.composer_to_feature_token.keys()
# ์ ํ๋ธ ๋น๋์ค์์ ์ค๋์ค ์ถ์ถ ํจ์
def get_audio_from_yt_video(yt_link):
try:
yt = pt.YouTube(yt_link)
t = yt.streams.filter(only_audio=True)
filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
t[0].download(filename=filename)
except:
warnings.warn(f"Video Not Found at {yt_link}")
filename = None
return filename, filename
# ๋ชจ๋ธ ์ถ๋ก ํจ์
def inference(file_uploaded, composer):
waveform, sr = librosa.load(file_uploaded, sr=None)
inputs = processor(audio=waveform, sampling_rate=sr, return_tensors="pt").to(device)
model_output = model.generate(input_features=inputs["input_features"], composer=composer)
tokenizer_output = processor.batch_decode(token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu"))["pretty_midi_objects"]
return prepare_output_file(tokenizer_output, sr)
# ์ถ๋ ฅ ํ์ผ ์ค๋น ํจ์
def prepare_output_file(tokenizer_output, sr):
output_file_name = "output_" + binascii.hexlify(os.urandom(8)).decode()
midi_output = os.path.join(outputs_dir, output_file_name + ".mid")
tokenizer_output[0].write(midi_output)
wav_output = midi_output.replace(".mid", ".wav")
midi2audio.FluidSynth().midi_to_audio(midi_output, wav_output)
return wav_output, wav_output, midi_output
# Gradio UI ์ค์
block = gr.Blocks(theme="Taithrah/Minimal")
with block:
gr.HTML(
"""
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
<h1 style="font-weight: 900; margin-bottom: 12px;">
๐น Pop2Piano : ํผ์๋
ธ ์ปค๋ฒ๊ณก ์์ฑ๊ธฐ ๐น
</h1>
<p style="margin-bottom: 12px; font-size: 90%">
Pop2Piano ๋ฐ๋ชจ: ํ ์ค๋์ค ๊ธฐ๋ฐ ํผ์๋
ธ ์ปค๋ฒ๊ณก ์์ฑ. <br>
์๊ณก๊ฐ(ํธ๊ณก์)๋ฅผ ์ ํํ๊ณ ํ ์ค๋์ค๋ฅผ ์
๋ก๋ํ๊ฑฐ๋ ์ ํ๋ธ ๋งํฌ๋ฅผ ์
๋ ฅํ ํ ์์ฑ ๋ฒํผ์ ํด๋ฆญํ์ธ์.
</p>
</div>
"""
)
with gr.Group():
with gr.Row():
with gr.Column():
file_uploaded = gr.Audio(label="์ค๋์ค ์
๋ก๋", type="filepath")
with gr.Column():
with gr.Row():
yt_link = gr.Textbox(label="์ ํ๋ธ ๋งํฌ๋ฅผ ์
๋ ฅํ์ธ์.", autofocus=True, lines=3)
yt_btn = gr.Button("์ ํ๋ธ ๋งํฌ์์ ์ค๋์ค๋ฅผ ๋ค์ด ๋ฐ์ต๋๋ค.", size="lg")
yt_audio_path = gr.Audio(label="์ ํ๋ธ ๋์์์์ ์ถ์ถํ ์ค๋์ค", interactive=False)
yt_btn.click(get_audio_from_yt_video, inputs=[yt_link], outputs=[yt_audio_path, file_uploaded])
with gr.Group():
with gr.Column():
composer = gr.Dropdown(label="ํธ๊ณก์", choices=composers, value="composer1")
generate_btn = gr.Button("๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก ๋ง๋ค๊ธฐ๐น๐ต")
with gr.Row():
wav_output2 = gr.File(label="๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก์ ๋ค์ด๋ก๋ (.wav)")
wav_output1 = gr.Audio(label="๋๋ง์ ํผ์๋
ธ ์ปค๋ฒ๊ณก ๋ฃ๊ธฐ")
midi_output = gr.File(label="์์ฑํ midi ํ์ผ ๋ค์ด๋ก๋ (.mid)")
generate_btn.click(
inference,
inputs=[file_uploaded, composer],
outputs=[wav_output1, wav_output2, midi_output])
block.launch(debug=False)
|