File size: 9,794 Bytes

c12a65c

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "61e10139",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "from music21 import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1a2b28be",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch.nn import functional as F\n",
    "\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "\n",
    "class GenerationRNN(nn.Module):\n",
    "    def __init__(self, input_size, hidden_size, output_size, n_layers=1):\n",
    "        super(GenerationRNN, self).__init__()\n",
    "        self.input_size = input_size\n",
    "        self.hidden_size = hidden_size\n",
    "        self.output_size = output_size\n",
    "        self.n_layers = n_layers\n",
    "        \n",
    "        self.embedding = nn.Embedding(input_size, hidden_size)\n",
    "        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)\n",
    "        self.decoder = nn.Linear(hidden_size * n_layers, output_size)\n",
    "    \n",
    "    def forward(self, input, hidden):\n",
    "        # Creates embedding of the input texts\n",
    "        #print('initial input', input.size())\n",
    "        input = self.embedding(input.view(1, -1))\n",
    "        #print('input after embedding', input.size())\n",
    "        output, hidden = self.gru(input, hidden)\n",
    "        #print('output after gru', output.size())\n",
    "        #print('hidden after gru', hidden.size())\n",
    "        output = self.decoder(hidden.view(1, -1))\n",
    "        #print('output after decoder', output.size())\n",
    "        return output, hidden\n",
    "\n",
    "    def init_hidden(self):\n",
    "        return torch.zeros(self.n_layers, 1, self.hidden_size).to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5b7120cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_multimomial(net, prime_seq, predict_len, temperature=0.8):\n",
    "    '''\n",
    "    Arguments:\n",
    "    prime_seq - priming sequence (converted t)\n",
    "    predict_len - number of notes to predict for after prime sequence\n",
    "    '''\n",
    "    hidden = net.init_hidden()\n",
    "\n",
    "    predicted = prime_seq.copy()\n",
    "    prime_seq = torch.tensor(prime_seq, dtype = torch.long).to(device)\n",
    "\n",
    "\n",
    "    # \"Building up\" the hidden state using the prime sequence\n",
    "    for p in range(len(prime_seq) - 1):\n",
    "        input = prime_seq[p]\n",
    "        _, hidden = net(input, hidden)\n",
    "    \n",
    "    # Last character of prime sequence\n",
    "    input = prime_seq[-1]\n",
    "    \n",
    "    # For every index to predict\n",
    "    for p in range(predict_len):\n",
    "\n",
    "        # Pass the inputs to the model - output has dimension n_pitches - scores for each of the possible characters\n",
    "        output, hidden = net(input, hidden)\n",
    "        # Sample from the network output as a multinomial distribution\n",
    "        output = output.data.view(-1).div(temperature).exp()\n",
    "        predicted_id = torch.multinomial(output, 1)\n",
    "\n",
    "        # Add predicted index to the list and use as next input\n",
    "        predicted.append(predicted_id.item()) \n",
    "        input = predicted_id\n",
    "\n",
    "    return predicted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8ce30142",
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = '/home/dmytro/ucu/music-generation/model.pkl'\n",
    "with open(file_path, 'rb') as f:\n",
    "    model = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "84a2ea9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = '/home/dmytro/ucu/music-generation/int_to_note.pkl'\n",
    "with open(file_path, 'rb') as f:\n",
    "    int_to_note = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "07815507",
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_midi(prediction_output):\n",
    "    \"\"\" convert the output from the prediction to notes and create a midi file\n",
    "        from the notes \"\"\"\n",
    "    offset = 0\n",
    "    output_notes = []\n",
    "\n",
    "    # create note and chord objects based on the values generated by the model\n",
    "    for pattern in prediction_output:\n",
    "        # pattern is a chord\n",
    "        if ('.' in pattern) or pattern.isdigit():\n",
    "            notes_in_chord = pattern.split('.')\n",
    "            notes = []\n",
    "            for current_note in notes_in_chord:\n",
    "                new_note = note.Note(int(current_note))\n",
    "                new_note.storedInstrument = instrument.Piano()\n",
    "                notes.append(new_note)\n",
    "            new_chord = chord.Chord(notes)\n",
    "            new_chord.offset = offset\n",
    "            output_notes.append(new_chord)\n",
    "        # pattern is a note\n",
    "        else:\n",
    "            new_note = note.Note(pattern)\n",
    "            new_note.offset = offset\n",
    "            new_note.storedInstrument = instrument.Piano()\n",
    "            output_notes.append(new_note)\n",
    "\n",
    "        # increase offset each iteration so that notes do not stack\n",
    "        offset += 0.5\n",
    "\n",
    "    midi_stream = stream.Stream(output_notes)\n",
    "\n",
    "    return midi_stream"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "a70a41f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "input_melody = [727,\n",
    " 224,\n",
    " 55,\n",
    " 55,\n",
    " 727,\n",
    " 224,\n",
    " 55]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "c9afc0c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "generated_seq_multinomial = predict_multimomial(model, input_melody, predict_len = 100, temperature = 2.2)\n",
    "generated_seq_multinomial = [int_to_note[e] for e in generated_seq_multinomial]\n",
    "pred_midi_multinomial = create_midi(generated_seq_multinomial)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "99a1aabe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/home/dmytro/ucu/music-generation/output/new_2.mid'"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_midi_multinomial.write('midi', fp='result.mid')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba84139a",
   "metadata": {},
   "outputs": [],
   "source": [
    "sound_font = \"/usr/share/sounds/sf2/FluidR3_GM.sf2\"\n",
    "FluidSynth(sound_font).midi_to_audio('result.midi', 'result.wav')\n",
    "return 'result.wav', 'result.midi'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f4481b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_input():\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f2e7a91",
   "metadata": {},
   "outputs": [],
   "source": [
    "midi_file_desc = \"\"\"Please entUpload your own MIDI file here (try to keep it small).\n",
    "If you do not have a MIDI file, add some text and we will turn it into music!\n",
    "\"\"\"\n",
    "\n",
    "article = \"\"\"# Pop Music Transformer\n",
    "We are using a language model to create music by treating a musical standard MIDI a simple text, with tokens for note values, note duration, and separations to denote movement forward in time.\n",
    "\n",
    "This is all following the great work you can find [at this repo](https://github.com/bearpelican/musicautobot). Moreover check out [their full web app](http://musicautobot.com/). We use the pretrained model they created as well as the utilities for converting between MIDI, audio streams, numpy encodings, and WAV files.\n",
    "\n",
    "## Sonification\n",
    "\n",
    "This is the process of turning something not inherently musical into music. Here we do something pretty simple. We take your input text \"pretty cool\", get a sentiment score (hard coded right now, model TODO), and use a major progression if it's positive and a minor progression if it's negative, and then factor the score into the randomness of the generated music. We also take the text and extract a melody by taking any of the letters from A to G, which in the example is just \"E C\". With the simple \"E C\" melody and a major progression a musical idea is generated.\n",
    "\"\"\"\n",
    "\n",
    "iface = gr.Interface(\n",
    "    fn=process_input, \n",
    "    inputs=[\n",
    "        gr.inputs.File(optional=True, label=midi_file_desc),\n",
    "        \"text\", \n",
    "        gr.inputs.Slider(0, 250, default=100, step=50),\n",
    "        gr.inputs.Radio([100, 200, 500], type=\"value\", default=100)\n",
    "        ], \n",
    "    outputs=[\"audio\", \"file\"],\n",
    "    article=article\n",
    "    # examples=['C major scale.midi']\n",
    ")\n",
    "\n",
    "iface.launch()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}