shethjenil commited on
Commit
cc3cb59
·
verified ·
1 Parent(s): ffe4dfb

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +53 -38
  2. midi_viz.html +92 -0
app.py CHANGED
@@ -1,42 +1,57 @@
1
  import gradio as gr
2
- from musc.model import PretrainedModel
3
- from json import load as json_load
4
- from mido import MidiFile,MidiTrack
5
- from os import remove as os_remove
6
- Model = PretrainedModel(json_load(open("violin.json")),"violin_model.pt").to("cpu")
7
- def merge_violin_tracks(input_midi, output_midi):
8
- mid = MidiFile(input_midi)
9
- new_mid = MidiFile(ticks_per_beat=mid.ticks_per_beat)
10
- new_track = MidiTrack()
11
- new_mid.tracks.append(new_track)
12
- events = []
13
- for track in mid.tracks:
14
- current_time = 0
15
- for msg in track:
16
- current_time += msg.time
17
- events.append((current_time, msg))
18
- events.sort(key=lambda x: x[0])
19
- last_time = 0
20
- for event_time, msg in events:
21
- delta_time = event_time - last_time
22
- new_track.append(msg.copy(time=delta_time))
23
- last_time = event_time
24
- for track in mid.tracks:
25
- for msg in track:
26
- if msg.type == 'set_tempo':
27
- new_track.insert(0, msg)
28
- new_mid.save(output_midi)
29
 
30
- def transcribe_and_generate_midi(music_file_path, model=Model, batch_size=32):
31
- model.transcribe(music_file_path, batch_size=batch_size).write("output.mid")
32
- merge_violin_tracks("output.mid","output.mid")
33
- os_remove(music_file_path)
34
- return "output.mid"
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  gr.Interface(
37
- fn=transcribe_and_generate_midi,
38
- inputs=gr.Audio(label="Upload your Audio file",type="filepath"),
39
- outputs=gr.File(label="Download MIDI file"),
40
- title="Audio2Violin",
41
- description="Upload a Audio file, and it will be transcribed into Violin MIDI format."
42
- ).launch()
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ from librosa import load as librosa_load
4
+ from pydub import AudioSegment
5
+ from pretty_midi import PrettyMIDI
6
+ from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
7
+ from os import listdir as os_listdir, remove as os_remove
8
+ from io import BytesIO
9
+ from base64 import b64encode
10
+ model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to("cpu")
11
+ processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
12
+ soundfonts = [i.removesuffix(".sf2") for i in os_listdir("soundfonts")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ def librosa_to_audiosegment(y, sr):
15
+ epsilon = 1e-8
16
+ if np.max(np.abs(y)) > 0:
17
+ y = y / (np.max(np.abs(y)) + epsilon) * 32767
18
+ return AudioSegment(y.astype(np.int16).tobytes(), frame_rate=sr, sample_width=2, channels=1)
19
 
20
+ def inference(file_upload, composer, sf2_files):
21
+ sf2_files = ["soundfonts/" + i + ".sf2" for i in sf2_files]
22
+ audio_data, audio_sr = librosa_load(file_upload, sr=None)
23
+ inputs = processor(audio=audio_data, sampling_rate=audio_sr, return_tensors="pt").to("cpu")
24
+ midi = processor.batch_decode(
25
+ token_ids=model.generate(input_features=inputs["input_features"], composer="composer" + str(composer)),
26
+ feature_extractor_output=inputs
27
+ )["pretty_midi_objects"][0]
28
+ with open("output.mid", "wb") as midi_file:
29
+ midi.write(midi_file)
30
+ midi = PrettyMIDI("output.mid")
31
+ final_mix = librosa_to_audiosegment(audio_data, audio_sr).apply_gain(-16)
32
+ for sf2_file in sf2_files:
33
+ sf_audio_data = midi.fluidsynth(fs=44100, sf2_path=sf2_file)
34
+ epsilon = 1e-8
35
+ sf_audio_data = np.int16(sf_audio_data / (np.max(np.abs(sf_audio_data)) + epsilon) * 32767)
36
+ sf_audio_segment = librosa_to_audiosegment(sf_audio_data, 44100)
37
+ if len(sf_audio_segment) < len(final_mix):
38
+ sf_audio_segment = sf_audio_segment.append(AudioSegment.silent(duration=len(final_mix) - len(sf_audio_segment)))
39
+ elif len(sf_audio_segment) > len(final_mix):
40
+ sf_audio_segment = sf_audio_segment[:len(final_mix)]
41
+ final_mix = final_mix.overlay(sf_audio_segment)
42
+ final_mix.export("output.mp3", format="mp3")
43
+ os_remove(file_upload)
44
+ return "output.mid", "output.mp3", f'<div style="display: flex; justify-content: center; align-items: center;"><iframe style="width: 100%; height: 500px; overflow:hidden" srcdoc=\'{open("midi_viz.html").read().replace("{midi_data}", b64encode(open("output.mid","rb").read()).decode("utf-8"))}\'></iframe></div>'
45
  gr.Interface(
46
+ inference,
47
+ [
48
+ gr.Audio(sources="upload", type="filepath", label="Audio"),
49
+ gr.Number(1, minimum=1, maximum=21, label="Composer"),
50
+ gr.Dropdown(soundfonts, multiselect=True, label="Instrument")
51
+ ],
52
+ [
53
+ gr.File(label="MIDI"),
54
+ gr.Audio(label="Instrument Audio"),
55
+ gr.HTML()
56
+ ]
57
+ ).launch()
midi_viz.html ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <style>
6
+ #midi-section midi-player {
7
+ display: block;
8
+ width: inherit;
9
+ margin: 4px;
10
+ margin-bottom: 0;
11
+ }
12
+
13
+ #midi-section midi-player::part(control-panel) {
14
+ background: #ff5;
15
+ border: 2px solid #000;
16
+ border-radius: 10px 10px 0 0;
17
+ }
18
+
19
+ #midi-section midi-player::part(play-button) {
20
+ color: #353;
21
+ border: 2px solid currentColor;
22
+ background-color: #4d4;
23
+ border-radius: 20px;
24
+ transition: all 0.2s;
25
+ content: "hello";
26
+ }
27
+
28
+ #midi-section midi-player::part(play-button):hover {
29
+ color: #0a0;
30
+ background-color: #5f5;
31
+ border-radius: 10px;
32
+ }
33
+
34
+ #midi-section midi-player::part(time) {
35
+ font-family: monospace;
36
+ }
37
+
38
+ #midi-section midi-visualizer .piano-roll-visualizer {
39
+ background: #ffd;
40
+ border: 2px solid black;
41
+ border-top: none;
42
+ border-radius: 0 0 10px 10px;
43
+ margin: 4px;
44
+ margin-top: 0;
45
+ overflow: auto;
46
+ }
47
+
48
+ #midi-section midi-visualizer svg rect.note {
49
+ opacity: 0.6;
50
+ stroke-width: 2;
51
+ }
52
+
53
+ #midi-section midi-visualizer svg rect.note[data-instrument="0"] {
54
+ fill: #e22;
55
+ stroke: #500;
56
+ }
57
+
58
+ #midi-section midi-visualizer svg rect.note[data-instrument="2"] {
59
+ fill: #2ee;
60
+ stroke: #055;
61
+ }
62
+
63
+ #midi-section midi-visualizer svg rect.note[data-is-drum="true"] {
64
+ fill: #888;
65
+ stroke: #888;
66
+ }
67
+
68
+ #midi-section midi-visualizer svg rect.note.active {
69
+ opacity: 0.9;
70
+ stroke: #000;
71
+ }
72
+ </style>
73
+ </head>
74
+
75
+ <body>
76
+ <section id="midi-section"><midi-player sound-font="" visualizer="#midi-section midi-visualizer"></midi-player><midi-visualizer></midi-visualizer></section>
77
+ <script src="https://cdn.jsdelivr.net/combine/npm/[email protected],npm/@magenta/[email protected]/es6/core.js,npm/focus-visible@5,npm/[email protected]"></script>
78
+ </body>
79
+ <script>
80
+ function base64ToBlobUrl(base64, mimeType = "audio/midi") {
81
+ const byteCharacters = atob(base64.split(",")[1] || base64);
82
+ const byteNumbers = new Array(byteCharacters.length);
83
+ for (let i = 0; i < byteCharacters.length; i++) {
84
+ byteNumbers[i] = byteCharacters.charCodeAt(i);
85
+ }
86
+ return URL.createObjectURL(new Blob([new Uint8Array(byteNumbers)], { type: mimeType }));
87
+ }
88
+ let midi_data = "{midi_data}";
89
+ document.querySelector("midi-player").src = base64ToBlobUrl(midi_data);
90
+ </script>
91
+
92
+ </html>