Spaces:
Running
Running
File size: 4,929 Bytes
ed24b9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
import note_seq
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("TristanBehrens/js-fakes-4bars")
model = AutoModelForCausalLM.from_pretrained("TristanBehrens/js-fakes-4bars")
NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / 120
BAR_LENGTH_120BPM = 4.0 * 60 / 120
SAMPLE_RATE=44100
def token_sequence_to_note_sequence(token_sequence, use_program=True, use_drums=True, instrument_mapper=None, only_piano=False):
if isinstance(token_sequence, str):
token_sequence = token_sequence.split()
note_sequence = empty_note_sequence()
# Render all notes.
current_program = 1
current_is_drum = False
current_instrument = 0
track_count = 0
for token_index, token in enumerate(token_sequence):
if token == "PIECE_START":
pass
elif token == "PIECE_END":
print("The end.")
break
elif token == "TRACK_START":
current_bar_index = 0
track_count += 1
pass
elif token == "TRACK_END":
pass
elif token == "KEYS_START":
pass
elif token == "KEYS_END":
pass
elif token.startswith("KEY="):
pass
elif token.startswith("INST"):
instrument = token.split("=")[-1]
if instrument != "DRUMS" and use_program:
if instrument_mapper is not None:
if instrument in instrument_mapper:
instrument = instrument_mapper[instrument]
current_program = int(instrument)
current_instrument = track_count
current_is_drum = False
if instrument == "DRUMS" and use_drums:
current_instrument = 0
current_program = 0
current_is_drum = True
elif token == "BAR_START":
current_time = current_bar_index * BAR_LENGTH_120BPM
current_notes = {}
elif token == "BAR_END":
current_bar_index += 1
pass
elif token.startswith("NOTE_ON"):
pitch = int(token.split("=")[-1])
note = note_sequence.notes.add()
note.start_time = current_time
note.end_time = current_time + 4 * NOTE_LENGTH_16TH_120BPM
note.pitch = pitch
note.instrument = current_instrument
note.program = current_program
note.velocity = 80
note.is_drum = current_is_drum
current_notes[pitch] = note
elif token.startswith("NOTE_OFF"):
pitch = int(token.split("=")[-1])
if pitch in current_notes:
note = current_notes[pitch]
note.end_time = current_time
elif token.startswith("TIME_DELTA"):
delta = float(token.split("=")[-1]) * NOTE_LENGTH_16TH_120BPM
current_time += delta
elif token.startswith("DENSITY="):
pass
elif token == "[PAD]":
pass
else:
#print(f"Ignored token {token}.")
pass
# Make the instruments right.
instruments_drums = []
for note in note_sequence.notes:
pair = [note.program, note.is_drum]
if pair not in instruments_drums:
instruments_drums += [pair]
note.instrument = instruments_drums.index(pair)
if only_piano:
for note in note_sequence.notes:
if not note.is_drum:
note.instrument = 0
note.program = 0
return note_sequence
def empty_note_sequence(qpm=120.0, total_time=0.0):
note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
note_sequence.tempos.add().qpm = qpm
note_sequence.ticks_per_quarter = note_seq.constants.STANDARD_PPQ
note_sequence.total_time = total_time
return note_sequence
def process(text):
input_ids = tokenizer.encode(text, return_tensors="pt")
generated_ids = model.generate(input_ids, max_length=500)
generated_sequence = tokenizer.decode(generated_ids[0])
# Convert text of notes to audio
note_sequence = token_sequence_to_note_sequence(generated_sequence)
synth = note_seq.midi_synth.synthesize
array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
note_plot = note_seq.plot_sequence(note_sequence, False)
array_of_floats /=1.414
array_of_floats *= 32767
int16_data = array_of_floats.astype(np.int16)
return SAMPLE_RATE, int16_data
title = "Music generation with GPT-2"
iface = gr.Interface(
fn=process,
inputs=[gr.inputs.Textbox(default="PIECE_START")],
outputs=['audio'],
title=title,
examples=[["PIECE_START"], ["PIECE_START STYLE=JSFAKES GENRE=JSFAKES TRACK_START INST=48 BAR_START NOTE_ON=61"]],
article="This demo is inspired in the notebook from https://huggingface.co/TristanBehrens/js-fakes-4bars"
)
iface.launch(debug=True) |