{ "cells": [ { "cell_type": "markdown", "id": "62c5865f", "metadata": {}, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 25, "id": "6c7800a6", "metadata": {}, "outputs": [], "source": [ "try:\n", " # are we running on Google Colab?\n", " import google.colab\n", " !git clone -q https://github.com/teticio/audio-diffusion.git\n", " %cd audio-diffusion\n", " !pip install -q -r requirements.txt\n", "except:\n", " pass" ] }, { "cell_type": "code", "execution_count": 26, "id": "b447e2c4", "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "sys.path.insert(0, os.path.dirname(os.path.abspath(\"\")))" ] }, { "cell_type": "code", "execution_count": 41, "id": "c2fc0e7a", "metadata": {}, "outputs": [], "source": [ "import torch\n", "import random\n", "import numpy as np\n", "from datasets import load_dataset\n", "from IPython.display import Audio\n", "from audiodiffusion.mel import Mel\n", "from audiodiffusion import AudioDiffusion" ] }, { "cell_type": "markdown", "id": "7fd945bb", "metadata": {}, "source": [ "### Select model" ] }, { "cell_type": "code", "execution_count": 28, "id": "97f24046", "metadata": {}, "outputs": [], "source": [ "#@markdown teticio/audio-diffusion-256 - trained on my Spotify \"liked\" playlist\n", "\n", "#@markdown teticio/audio-diffusion-breaks-256 - trained on samples used in music\n", "\n", "#@markdown teticio/audio-diffusion-instrumental-hiphop-256 - trained on instrumental hiphop\n", "\n", "model_id = \"teticio/audio-diffusion-256\" #@param [\"teticio/audio-diffusion-256\", \"teticio/audio-diffusion-breaks-256\", \"audio-diffusion-instrumenal-hiphop-256\"]" ] }, { "cell_type": "code", "execution_count": 29, "id": "a3d45c36", "metadata": {}, "outputs": [], "source": [ "audio_diffusion = AudioDiffusion(model_id=model_id)" ] }, { "cell_type": "markdown", "id": "011fb5a1", "metadata": {}, "source": [ "### Run model inference to generate mel spectrogram, audios and loops" ] }, { "cell_type": "code", "execution_count": null, "id": "b809fed5", "metadata": {}, "outputs": [], "source": [ "generator = torch.Generator()\n", "for _ in range(10):\n", " print(f'Seed = {generator.seed()}')\n", " image, (sample_rate, audio) = audio_diffusion.generate_spectrogram_and_audio(generator)\n", " display(image)\n", " display(Audio(audio, rate=sample_rate))\n", " loop = AudioDiffusion.loop_it(audio, sample_rate)\n", " if loop is not None:\n", " display(Audio(loop, rate=sample_rate))\n", " else:\n", " print(\"Unable to determine loop points\")" ] }, { "cell_type": "markdown", "id": "0bb03e33", "metadata": {}, "source": [ "### Generate variations of audios" ] }, { "cell_type": "markdown", "id": "80e5b5fa", "metadata": {}, "source": [ "Try playing around with `start_steps`. Values closer to zero will produce new samples, while values closer to 1,000 will produce samples more faithful to the original." ] }, { "cell_type": "code", "execution_count": null, "id": "a7e637e5", "metadata": {}, "outputs": [], "source": [ "seed = 16183389798189209330 #@param {type:\"integer\"}\n", "image, (sample_rate,\n", " audio) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n", " generator=torch.Generator().manual_seed(seed))\n", "display(image)\n", "display(Audio(audio, rate=sample_rate))" ] }, { "cell_type": "code", "execution_count": null, "id": "a0fefe28", "metadata": { "scrolled": false }, "outputs": [], "source": [ "start_steps = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n", "track = AudioDiffusion.loop_it(audio, sample_rate, loops=1)\n", "for variation in range(12):\n", " image2, (\n", " sample_rate, audio2\n", " ) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n", " raw_audio=audio,\n", " start_step=start_steps)\n", " display(image2)\n", " display(Audio(audio2, rate=sample_rate))\n", " track = np.concatenate([track, AudioDiffusion.loop_it(audio2, sample_rate, loops=1)])\n", "display(Audio(track, rate=sample_rate))" ] }, { "cell_type": "markdown", "id": "b6434d3f", "metadata": {}, "source": [ "### Remix (style transfer)" ] }, { "cell_type": "markdown", "id": "0da030b2", "metadata": {}, "source": [ "Alternatively, you can start from another audio altogether, resulting in a kind of style transfer." ] }, { "cell_type": "code", "execution_count": 50, "id": "fc620a80", "metadata": {}, "outputs": [], "source": [ "try:\n", " # are we running on Google Colab?\n", " from google.colab import files\n", " audio_file = list(files.upload().keys())[0]\n", "except:\n", " audio_file = \"/home/teticio/Music/Music/Sven Väth/In the Mix_ The Sound of the Sixteenth S/14 Eclipse.m4a\"" ] }, { "cell_type": "code", "execution_count": null, "id": "5a257e69", "metadata": { "scrolled": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6e741e6bd196458fa38f86197bd16378", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/500 [00:00