{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "VGrGd6__l5ch" }, "source": [ "# Monster Piano Transformer No Velocity Maker (ver. 1.0)\n", "\n", "***\n", "\n", "Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools\n", "\n", "***\n", "\n", "WARNING: This complete implementation is a functioning model of the Artificial Intelligence. Please excercise great humility, care, and respect. https://www.nscai.gov/\n", "\n", "***\n", "\n", "#### Project Los Angeles\n", "\n", "#### Tegridy Code 2025\n", "\n", "***" ] }, { "cell_type": "markdown", "metadata": { "id": "shLrgoXdl5cj" }, "source": [ "# GPU check" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "X3rABEpKCO02" }, "outputs": [], "source": [ "!nvidia-smi" ] }, { "cell_type": "markdown", "metadata": { "id": "0RcVC4btl5ck" }, "source": [ "# Setup environment" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "viHgEaNACPTs" }, "outputs": [], "source": [ "!git clone --depth 1 https://github.com/asigalov61/tegridy-tools" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "vK40g6V_BTNj" }, "outputs": [], "source": [ "!pip install datasets\n", "!pip install huggingface_hub\n", "!pip install hf-transfer\n", "!pip install ipywidgets\n", "!pip install tqdm\n", "\n", "!sudo pip install einops\n", "!sudo pip install torch-summary\n", "!sudo pip install -U tqdm\n", "!sudo pip install huggingface_hub" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Import Modules" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "DzCOZU_gBiQV" }, "outputs": [], "source": [ "# Load modules and make data dir\n", "\n", "print('Loading modules...')\n", "\n", "import os\n", "\n", "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\"\n", "\n", "import pickle\n", "import random\n", "import secrets\n", "import tqdm\n", "import math\n", "\n", "import gc\n", "\n", "!set USE_FLASH_ATTENTION=1\n", "os.environ['USE_FLASH_ATTENTION'] = '1'\n", "\n", "import torch\n", "import torch.optim as optim\n", "\n", "from torch.utils.data import DataLoader, Dataset\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "from torchsummary import summary\n", "from sklearn import metrics\n", "\n", "from datasets import load_dataset\n", "\n", "from huggingface_hub import hf_hub_download\n", "\n", "%cd /home/ubuntu/tegridy-tools/tegridy-tools/\n", "\n", "import TMIDIX\n", "\n", "%cd /home/ubuntu/tegridy-tools/tegridy-tools/X-Transformer\n", "\n", "from x_transformer_1_23_2 import *\n", "\n", "torch.set_float32_matmul_precision('high')\n", "torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul\n", "torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn\n", "torch.backends.cuda.enable_flash_sdp(True)\n", "torch.backends.cuda.enable_cudnn_sdp(False)\n", "\n", "!set USE_FLASH_ATTENTION=1\n", "\n", "%cd /home/ubuntu/\n", "\n", "if not os.path.exists('/home/ubuntu/INTS'):\n", " os.makedirs('/home/ubuntu/INTS')\n", "\n", "import random\n", "\n", "print('Done')\n", "\n", "print('Torch version:', torch.__version__)" ] }, { "cell_type": "markdown", "metadata": { "id": "cd-51e9wooMs" }, "source": [ "# Load Training Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "monster_piano = load_dataset('asigalov61/Monster-Piano')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Prep Training Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "SEQ_LEN = 2048\n", "PAD_IDX = 384 # Model pad index\n", "\n", "#==========================================================================\n", "\n", "print('=' * 70)\n", "print('Loading data files...')\n", "print('Please wait...')\n", "print('=' * 70)\n", "\n", "train_data = set()\n", "\n", "chunks_counter = 0\n", "\n", "for entry in tqdm.tqdm(monster_piano['train']):\n", "\n", " score = entry['midi_score']\n", " score = [t for t in score if t < 384]\n", "\n", " if 0 <= max(score) < PAD_IDX: # final data integrity check\n", "\n", " for i in range(0, len(score), SEQ_LEN-1024):\n", " \n", " chunk = score[i:i+SEQ_LEN+1]\n", "\n", " chunks_counter += 1\n", "\n", " if len(chunk) < SEQ_LEN+1:\n", " chunk += [PAD_IDX] * (SEQ_LEN+1 - len(chunk))\n", "\n", " train_data.add(tuple(chunk))\n", "\n", " else:\n", " print('Bad data!!!')\n", "\n", "#==========================================================================\n", "\n", "train_data = list(train_data)\n", "\n", "#==========================================================================\n", "\n", "print('Done!')\n", "print('=' * 70)\n", "print('Total number of main chunks:', chunks_counter)\n", "print('All data is good:', len(max(train_data, key=len)) == len(min(train_data, key=len)))\n", "print('=' * 70)\n", "print('Randomizing train data...')\n", "random.shuffle(train_data)\n", "print('Done!')\n", "print('=' * 70)\n", "print('Total length of train data:', len(train_data))\n", "print('=' * 70)" ] }, { "cell_type": "markdown", "metadata": { "id": "VhZqBvqVl5cn" }, "source": [ "# Setup model" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "mfwp06xzzPZ5" }, "outputs": [], "source": [ "# Setup model\n", "\n", "# constants\n", "\n", "VALIDATE_EVERY = 500\n", "SAVE_EVERY = 2500\n", "GENERATE_EVERY = 1000\n", "GENERATE_LENGTH = 512\n", "PRINT_STATS_EVERY = 50\n", "\n", "NUM_EPOCHS = 10\n", "\n", "BATCH_SIZE = 116\n", "GRADIENT_ACCUMULATE_EVERY = 1\n", "\n", "LEARNING_RATE = 1e-4\n", "GRAD_CLIP = 1.5\n", "\n", "# instantiate the model\n", "\n", "model = TransformerWrapper(\n", " num_tokens = PAD_IDX+1,\n", " max_seq_len = SEQ_LEN,\n", " attn_layers = Decoder(dim = 2048,\n", " depth = 4,\n", " heads = 32,\n", " rotary_pos_emb = True,\n", " attn_flash = True\n", " )\n", " )\n", "\n", "model = AutoregressiveWrapper(model, ignore_index = PAD_IDX, pad_value=PAD_IDX)\n", "\n", "model.cuda()\n", "\n", "print('Done!')\n", "\n", "summary(model)\n", "\n", "# Dataloader\n", "\n", "def get_train_data_batch(tdata, index, seq_len, batch_size, pad_idx):\n", "\n", " batch = tdata[(index*batch_size):(index*batch_size)+batch_size]\n", "\n", " return torch.LongTensor(batch).cuda()\n", "\n", "# precision/optimizer/scaler\n", "\n", "dtype = torch.bfloat16\n", "\n", "ctx = torch.amp.autocast(device_type='cuda', dtype=dtype)\n", "\n", "optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)\n", "\n", "scaler = torch.amp.GradScaler('cuda')" ] }, { "cell_type": "markdown", "metadata": { "id": "xJPxxFiwl5cn" }, "source": [ "# Train" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "HETGqz_6K1ml", "scrolled": true }, "outputs": [], "source": [ "# Train the model\n", "\n", "train_losses = []\n", "val_losses = []\n", "\n", "train_accs = []\n", "val_accs = []\n", "\n", "nsteps = 0\n", "\n", "for ep in range(NUM_EPOCHS):\n", "\n", " print('=' * 70)\n", " print('Randomizing train data...')\n", " random.shuffle(train_data)\n", " print('=' * 70)\n", "\n", " print('=' * 70)\n", " print('Epoch #', ep)\n", " print('=' * 70)\n", "\n", " NUM_BATCHES = len(train_data) // BATCH_SIZE // GRADIENT_ACCUMULATE_EVERY\n", "\n", " model.train()\n", "\n", " for i in tqdm.tqdm(range(NUM_BATCHES), mininterval=10., desc='Training'):\n", "\n", " optim.zero_grad()\n", "\n", " for j in range(GRADIENT_ACCUMULATE_EVERY):\n", " with ctx:\n", " loss, acc = model(get_train_data_batch(train_data, (i*GRADIENT_ACCUMULATE_EVERY)+j, SEQ_LEN, BATCH_SIZE, PAD_IDX))\n", " #loss = loss / GRADIENT_ACCUMULATE_EVERY\n", " scaler.scale(loss).backward()\n", "\n", " if i % PRINT_STATS_EVERY == 0:\n", " print(f'Training loss: {loss.item() * GRADIENT_ACCUMULATE_EVERY}')\n", " print(f'Training acc: {acc.item()}')\n", "\n", " train_losses.append(loss.item() * GRADIENT_ACCUMULATE_EVERY)\n", " train_accs.append(acc.item())\n", "\n", " scaler.unscale_(optim)\n", " torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)\n", " scaler.step(optim)\n", " scaler.update()\n", "\n", " nsteps += 1\n", "\n", " if i % VALIDATE_EVERY == 0:\n", " model.eval()\n", " with torch.no_grad():\n", " with ctx:\n", " val_loss, val_acc = model(get_train_data_batch(train_data, i, SEQ_LEN, BATCH_SIZE, PAD_IDX))\n", "\n", " print(f'Validation loss: {val_loss.item()}')\n", " print(f'Validation acc: {val_acc.item()}')\n", "\n", " val_losses.append(val_loss.item())\n", " val_accs.append(val_acc.item())\n", "\n", " print('Plotting training loss graph...')\n", "\n", " tr_loss_list = train_losses\n", " plt.plot([i for i in range(len(tr_loss_list))] ,tr_loss_list, 'b')\n", " plt.show()\n", " plt.close()\n", " print('Done!')\n", "\n", " print('Plotting training acc graph...')\n", "\n", " tr_loss_list = train_accs\n", " plt.plot([i for i in range(len(tr_loss_list))] ,tr_loss_list, 'b')\n", " plt.show()\n", " plt.close()\n", " print('Done!')\n", "\n", " print('Plotting validation loss graph...')\n", " tr_loss_list = val_losses\n", " plt.plot([i for i in range(len(tr_loss_list))] ,tr_loss_list, 'b')\n", " plt.show()\n", " plt.close()\n", " print('Done!')\n", "\n", " print('Plotting validation acc graph...')\n", " tr_loss_list = val_accs\n", " plt.plot([i for i in range(len(tr_loss_list))] ,tr_loss_list, 'b')\n", " plt.show()\n", " plt.close()\n", " print('Done!')\n", "\n", " model.train()\n", "\n", " if i % GENERATE_EVERY == 0:\n", " model.eval()\n", "\n", " inp = random.choice(get_train_data_batch(train_data, i, SEQ_LEN, BATCH_SIZE, PAD_IDX))[:GENERATE_LENGTH]\n", "\n", " print(inp)\n", "\n", " with ctx:\n", " sample = model.generate(inp[None, ...], GENERATE_LENGTH)\n", "\n", " print(sample)\n", "\n", " data = sample.tolist()[0]\n", "\n", " print('Sample INTs', data[:15])\n", "\n", " if len(data) != 0:\n", "\n", " song = data\n", " song_f = []\n", "\n", " time = 0\n", " dur = 1\n", " vel = 90\n", " pitch = 60\n", " channel = 0\n", " patch = 0\n", "\n", " patches = [0] * 16\n", "\n", " for m in song:\n", "\n", " if 0 <= m < 128:\n", " time += m * 32\n", " \n", " elif 128 < m < 256:\n", " dur = (m-128) * 32\n", " \n", " elif 256 < m < 384:\n", " pitch = (m-256)\n", " \n", " song_f.append(['note', time, dur, 0, pitch, vel, 0])\n", "\n", "\n", " detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,\n", " output_signature = 'Monster Piano Transformer',\n", " output_file_name = '/home/ubuntu/Monster-Piano-Transformer-Composition',\n", " track_name='Project Los Angeles',\n", " list_of_MIDI_patches=patches\n", " )\n", "\n", " print('Done!')\n", "\n", " model.train()\n", "\n", " if i % SAVE_EVERY == 0:\n", "\n", " print('Saving model progress. Please wait...')\n", " print('model_checkpoint_' + str(nsteps) + '_steps_' + str(round(float(train_losses[-1]), 4)) + '_loss_' + str(round(float(train_accs[-1]), 4)) + '_acc.pth')\n", "\n", " fname = '/home/ubuntu/model_checkpoint_' + str(nsteps) + '_steps_' + str(round(float(train_losses[-1]), 4)) + '_loss_' + str(round(float(train_accs[-1]), 4)) + '_acc.pth'\n", "\n", " torch.save(model.state_dict(), fname)\n", "\n", " data = [train_losses, train_accs, val_losses, val_accs]\n", "\n", " TMIDIX.Tegridy_Any_Pickle_File_Writer(data, '/home/ubuntu/losses_accs')\n", "\n", " print('Done!')" ] }, { "cell_type": "markdown", "metadata": { "id": "wBkMH2gWl5co" }, "source": [ "# Final Save" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "gjBJnzZxWslL" }, "outputs": [], "source": [ "print('Saving model progress. Please wait...')\n", "print('model_checkpoint_' + str(nsteps) + '_steps_' + str(round(float(train_losses[-1]), 4)) + '_loss_' + str(round(float(train_accs[-1]), 4)) + '_acc.pth')\n", "\n", "fname = '/home/ubuntu/model_checkpoint_' + str(nsteps) + '_steps_' + str(round(float(train_losses[-1]), 4)) + '_loss_' + str(round(float(train_accs[-1]), 4)) + '_acc.pth'\n", "\n", "torch.save(model.state_dict(), fname)\n", "#torch.save(optim.state_dict(), fname+'_opt')\n", "\n", "print('Done!')\n", "\n", "data = [train_losses, train_accs, val_losses, val_accs]\n", "\n", "TMIDIX.Tegridy_Any_Pickle_File_Writer(data, '/home/ubuntu/losses_accuracies')\n", "\n", "# Save training loss graph\n", "\n", "plt.plot([i for i in range(len(train_losses))] ,train_losses, 'b')\n", "plt.savefig('/home/ubuntu/training_loss_graph.png')\n", "plt.close()\n", "print('Done!')\n", "\n", "# Save training acc graph\n", "\n", "plt.plot([i for i in range(len(train_accs))] ,train_accs, 'b')\n", "plt.savefig('/home/ubuntu/training_acc_graph.png')\n", "plt.close()\n", "print('Done!')\n", "\n", "# Save validation loss graph\n", "\n", "plt.plot([i for i in range(len(val_losses))] ,val_losses, 'b')\n", "plt.savefig('/home/ubuntu/validation_loss_graph.png')\n", "plt.close()\n", "print('Done!')\n", "\n", "# Save validation acc graph\n", "\n", "plt.plot([i for i in range(len(val_accs))] ,val_accs, 'b')\n", "plt.savefig('/home/ubuntu/validation_acc_graph.png')\n", "plt.close()\n", "print('Done!')" ] }, { "cell_type": "markdown", "metadata": { "id": "feXay_Ed7mG5" }, "source": [ "# Eval" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SA8qQSzbWslM" }, "outputs": [], "source": [ "hf_hub_download(repo_id='asigalov61/Monster-Piano-Transformer',\n", " filename='Monster_Piano_Transformer_No_Velocity_Trained_Model_161960_steps_0.7775_loss_0.7661_acc.pth',\n", " local_dir='/home/ubuntu/Models/',\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "gSvqSRLaWslM" }, "outputs": [], "source": [ "SEQ_LEN = 2048\n", "PAD_IDX = 384\n", "\n", "model = TransformerWrapper(\n", " num_tokens = PAD_IDX+1,\n", " max_seq_len = SEQ_LEN,\n", " attn_layers = Decoder(dim = 2048,\n", " depth = 4,\n", " heads = 32,\n", " rotary_pos_emb = True,\n", " attn_flash = True\n", " )\n", " )\n", "\n", "model = AutoregressiveWrapper(model, ignore_index = PAD_IDX, pad_value=PAD_IDX)\n", "\n", "print('=' * 70)\n", "print('Loading model checkpoint...')\n", "\n", "model_path = 'Models/Monster_Piano_Transformer_No_Velocity_Trained_Model_161960_steps_0.7775_loss_0.7661_acc.pth'\n", "\n", "model.load_state_dict(torch.load(model_path, weights_only=True))\n", "\n", "print('=' * 70)\n", "\n", "model.cuda()\n", "model.eval()\n", "\n", "print('Done!')\n", "\n", "summary(model)\n", "\n", "dtype = torch.bfloat16\n", "\n", "ctx = torch.amp.autocast(device_type='cuda', dtype=dtype)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "enHpaHxaWslM" }, "outputs": [], "source": [ "midi_file = '/home/ubuntu/tegridy-tools/tegridy-tools/seed2.mid'\n", "\n", "raw_score = TMIDIX.midi2single_track_ms_score(midi_file)\n", "escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]\n", "escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes, timings_divider=32)\n", "\n", "sp_escore_notes = TMIDIX.solo_piano_escore_notes(escore_notes, keep_drums=False)\n", "zscore = TMIDIX.recalculate_score_timings(sp_escore_notes)\n", "\n", "cscore = TMIDIX.chordify_score([1000, zscore])\n", "\n", "score = []\n", "\n", "pc = cscore[0]\n", "\n", "for c in cscore:\n", " score.append(max(0, min(127, c[0][1]-pc[0][1])))\n", "\n", " for n in c:\n", " score.extend([max(1, min(127, n[2]))+128, max(1, min(127, n[4]))+256])\n", "\n", " pc = c\n", "\n", "print('Done!')\n", "print('=' * 70)\n", "print(len(score))\n", "print('=' * 70)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "naf65RxUXwDg" }, "outputs": [], "source": [ "x = torch.LongTensor(score[:1024]).cuda()\n", "\n", "with ctx:\n", " out = model.generate(x,\n", " 1024,\n", " temperature=0.9,\n", " #filter_logits_fn=top_k,\n", " #filter_kwargs={'k': 15},\n", " return_prime=True,\n", " verbose=True)\n", "\n", "y = out.tolist()\n", "\n", "print('---------------')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "tlBzqWpAnZna" }, "outputs": [], "source": [ "#@title Test INTs\n", "\n", "data = y[0]\n", "\n", "print('Sample INTs', data[:15])\n", "\n", "if len(data) != 0:\n", "\n", " song = data\n", " song_f = []\n", "\n", " time = 0\n", " dur = 1\n", " vel = 90\n", " pitch = 60\n", " channel = 0\n", " patch = 0\n", "\n", " patches = [0] * 16\n", "\n", " for m in song:\n", "\n", " if 0 <= m < 128:\n", " time += m * 32\n", "\n", " elif 128 < m < 256:\n", " dur = (m-128) * 32\n", "\n", " elif 256 < m < 384:\n", " pitch = (m-256)\n", "\n", " song_f.append(['note', time, dur, 0, pitch, vel, 0])\n", "\n", "\n", " detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f,\n", " output_signature = 'Monster Piano Transformer',\n", " output_file_name = '/home/ubuntu/Monster-Piano-Transformer-Composition',\n", " track_name='Project Los Angeles',\n", " list_of_MIDI_patches=patches\n", " )\n", "\n", "print('Done!')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "al3TDlH7T8m7" }, "outputs": [], "source": [ "tok_emb = model.net.token_emb.emb.weight.detach().cpu().tolist()\n", "\n", "cos_sim = metrics.pairwise_distances(\n", " tok_emb, metric='cosine'\n", ")\n", "plt.figure(figsize=(7, 7))\n", "plt.imshow(cos_sim, cmap=\"inferno\", interpolation=\"nearest\")\n", "im_ratio = cos_sim.shape[0] / cos_sim.shape[1]\n", "plt.colorbar(fraction=0.046 * im_ratio, pad=0.04)\n", "plt.xlabel(\"Position\")\n", "plt.ylabel(\"Position\")\n", "plt.tight_layout()\n", "plt.plot()\n", "plt.savefig(\"/home/ubuntu/Monster-Piano-Transformer-Tokens-Embeddings-Plot.png\", bbox_inches=\"tight\")" ] }, { "cell_type": "markdown", "metadata": { "id": "z87TlDTVl5cp" }, "source": [ "# Congrats! You did it! :)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuClass": "premium", "gpuType": "T4", "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 4 }