teticio commited on
Commit
d80e37b
1 Parent(s): f30235e

fix audio loading

Browse files
Files changed (1) hide show
  1. notebooks/test_model.ipynb +7 -6
notebooks/test_model.ipynb CHANGED
@@ -46,6 +46,7 @@
46
  "source": [
47
  "import torch\n",
48
  "import random\n",
 
49
  "import numpy as np\n",
50
  "from datasets import load_dataset\n",
51
  "from IPython.display import Audio\n",
@@ -266,8 +267,8 @@
266
  "source": [
267
  "start_step = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
268
  "overlap_secs = 2 #@param {type:\"integer\"}\n",
269
- "mel.load_audio(audio_file)\n",
270
- "overlap_samples = overlap_secs * mel.get_sample_rate()\n",
271
  "slice_size = mel.x_res * mel.hop_length\n",
272
  "stride = slice_size - overlap_samples\n",
273
  "generator = torch.Generator(device=device)\n",
@@ -275,9 +276,9 @@
275
  "print(f'Seed = {seed}')\n",
276
  "track = np.array([])\n",
277
  "not_first = 0\n",
278
- "for sample in range(len(mel.audio) // stride):\n",
279
  " generator.manual_seed(seed)\n",
280
- " audio = np.array(mel.audio[sample * stride:sample * stride + slice_size])\n",
281
  " if not_first:\n",
282
  " # Normalize and re-insert generated audio\n",
283
  " audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
@@ -309,7 +310,7 @@
309
  "outputs": [],
310
  "source": [
311
  "slice = 3 #@param {type:\"integer\"}\n",
312
- "raw_audio = mel.get_audio_slice(slice)\n",
313
  "_, (sample_rate,\n",
314
  " audio2) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n",
315
  " raw_audio=raw_audio,\n",
@@ -507,7 +508,7 @@
507
  "metadata": {},
508
  "outputs": [],
509
  "source": [
510
- "model_id = \"teticio/latent-audio-diffusion-ddim-256-new\" #@param [\"teticio/latent-audio-diffusion-256\", \"teticio/latent-audio-diffusion-ddim-256\"]"
511
  ]
512
  },
513
  {
 
46
  "source": [
47
  "import torch\n",
48
  "import random\n",
49
+ "import librosa\n",
50
  "import numpy as np\n",
51
  "from datasets import load_dataset\n",
52
  "from IPython.display import Audio\n",
 
267
  "source": [
268
  "start_step = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
269
  "overlap_secs = 2 #@param {type:\"integer\"}\n",
270
+ "track_audio, _ = librosa.load(audio_file, mono=True, sr=sample_rate)\n",
271
+ "overlap_samples = overlap_secs * sample_rate\n",
272
  "slice_size = mel.x_res * mel.hop_length\n",
273
  "stride = slice_size - overlap_samples\n",
274
  "generator = torch.Generator(device=device)\n",
 
276
  "print(f'Seed = {seed}')\n",
277
  "track = np.array([])\n",
278
  "not_first = 0\n",
279
+ "for sample in range(len(track_audio) // stride):\n",
280
  " generator.manual_seed(seed)\n",
281
+ " audio = np.array(track_audio[sample * stride:sample * stride + slice_size])\n",
282
  " if not_first:\n",
283
  " # Normalize and re-insert generated audio\n",
284
  " audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
 
310
  "outputs": [],
311
  "source": [
312
  "slice = 3 #@param {type:\"integer\"}\n",
313
+ "raw_audio = track_audio[sample * stride:sample * stride + slice_size]\n",
314
  "_, (sample_rate,\n",
315
  " audio2) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n",
316
  " raw_audio=raw_audio,\n",
 
508
  "metadata": {},
509
  "outputs": [],
510
  "source": [
511
+ "model_id = \"teticio/latent-audio-diffusion-ddim-256\" #@param [\"teticio/latent-audio-diffusion-256\", \"teticio/latent-audio-diffusion-ddim-256\"]"
512
  ]
513
  },
514
  {