Spaces:

akhaliq
/

steerable-nafx

Runtime error

App Files Files Community

Ahsen Khaliq commited on Dec 11, 2021

Commit

0fbd9ed

•

1 Parent(s): c43590a

Create app.py

Browse files

Files changed (1) hide show

app.py +269 -0

app.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import os
+os.system("wget https://csteinmetz1.github.io/steerable-nafx/models/compressor_full.pt")
+os.system("wget https://csteinmetz1.github.io/steerable-nafx/models/reverb_full.pt")
+os.system("wget https://csteinmetz1.github.io/steerable-nafx/models/amp_full.pt")
+os.system("wget https://csteinmetz1.github.io/steerable-nafx/models/delay_full.pt")
+os.system("wget https://csteinmetz1.github.io/steerable-nafx/models/delay_full.pt")
+import sys
+import math
+import torch
+import librosa.display
+import IPython
+import auraloss
+import torchaudio
+import numpy as np
+import scipy.signal
+from google.colab import files
+from tqdm.notebook import tqdm
+from time import sleep
+import matplotlib
+import pyloudnorm as pyln
+import matplotlib.pyplot as plt
+from IPython.display import Image
+def measure_rt60(h, fs=1, decay_db=30, rt60_tgt=None):
+    """
+    Analyze the RT60 of an impulse response.
+    Args:
+        h (ndarray): The discrete time impulse response as 1d array.
+        fs (float, optional): Sample rate of the impulse response. (Default: 48000)
+        decay_db (float, optional): The decay in decibels for which we actually estimate the time. (Default: 60)
+        rt60_tgt (float, optional): This parameter can be used to indicate a target RT60. (Default: None)
+    Returns:
+        est_rt60 (float): Estimated RT60.
+    """
+    h = np.array(h)
+    fs = float(fs)
+    # The power of the impulse response in dB
+    power = h ** 2
+    energy = np.cumsum(power[::-1])[::-1]  # Integration according to Schroeder
+    try:
+        # remove the possibly all zero tail
+        i_nz = np.max(np.where(energy > 0)[0])
+        energy = energy[:i_nz]
+        energy_db = 10 * np.log10(energy)
+        energy_db -= energy_db[0]
+        # -5 dB headroom
+        i_5db = np.min(np.where(-5 - energy_db > 0)[0])
+        e_5db = energy_db[i_5db]
+        t_5db = i_5db / fs
+        # after decay
+        i_decay = np.min(np.where(-5 - decay_db - energy_db > 0)[0])
+        t_decay = i_decay / fs
+        # compute the decay time
+        decay_time = t_decay - t_5db
+        est_rt60 = (60 / decay_db) * decay_time
+    except:
+        est_rt60 = np.array(0.0)
+    return est_rt60
+def causal_crop(x, length: int):
+    if x.shape[-1] != length:
+        stop = x.shape[-1] - 1
+        start = stop - length
+        x = x[..., start:stop]
+    return x
+class FiLM(torch.nn.Module):
+    def __init__(
+        self,
+        cond_dim,  # dim of conditioning input
+        num_features,  # dim of the conv channel
+        batch_norm=True,
+    ):
+        super().__init__()
+        self.num_features = num_features
+        self.batch_norm = batch_norm
+        if batch_norm:
+            self.bn = torch.nn.BatchNorm1d(num_features, affine=False)
+        self.adaptor = torch.nn.Linear(cond_dim, num_features * 2)
+    def forward(self, x, cond):
+        cond = self.adaptor(cond)
+        g, b = torch.chunk(cond, 2, dim=-1)
+        g = g.permute(0, 2, 1)
+        b = b.permute(0, 2, 1)
+        if self.batch_norm:
+            x = self.bn(x)  # apply BatchNorm without affine
+        x = (x * g) + b  # then apply conditional affine
+        return x
+class TCNBlock(torch.nn.Module):
+  def __init__(self, in_channels, out_channels, kernel_size, dilation, cond_dim=0, activation=True):
+    super().__init__()
+    self.conv = torch.nn.Conv1d(
+        in_channels,
+        out_channels,
+        kernel_size,
+        dilation=dilation,
+        padding=0, #((kernel_size-1)//2)*dilation,
+        bias=True)
+    if cond_dim > 0:
+      self.film = FiLM(cond_dim, out_channels, batch_norm=False)
+    if activation:
+      #self.act = torch.nn.Tanh()
+      self.act = torch.nn.PReLU()
+    self.res = torch.nn.Conv1d(in_channels, out_channels, 1, bias=False)
+  def forward(self, x, c=None):
+    x_in = x
+    x = self.conv(x)
+    if hasattr(self, "film"):
+      x = self.film(x, c)
+    if hasattr(self, "act"):
+      x = self.act(x)
+    x_res = causal_crop(self.res(x_in), x.shape[-1])
+    x = x + x_res
+    return x
+class TCN(torch.nn.Module):
+  def __init__(self, n_inputs=1, n_outputs=1, n_blocks=10, kernel_size=13, n_channels=64, dilation_growth=4, cond_dim=0):
+    super().__init__()
+    self.kernel_size = kernel_size
+    self.n_channels = n_channels
+    self.dilation_growth = dilation_growth
+    self.n_blocks = n_blocks
+    self.stack_size = n_blocks
+    self.blocks = torch.nn.ModuleList()
+    for n in range(n_blocks):
+      if n == 0:
+        in_ch = n_inputs
+        out_ch = n_channels
+        act = True
+      elif (n+1) == n_blocks:
+        in_ch = n_channels
+        out_ch = n_outputs
+        act = True
+      else:
+        in_ch = n_channels
+        out_ch = n_channels
+        act = True
+      dilation = dilation_growth ** n
+      self.blocks.append(TCNBlock(in_ch, out_ch, kernel_size, dilation, cond_dim=cond_dim, activation=act))
+  def forward(self, x, c=None):
+    for block in self.blocks:
+      x = block(x, c)
+    return x
+  def compute_receptive_field(self):
+    """Compute the receptive field in samples."""
+    rf = self.kernel_size
+    for n in range(1, self.n_blocks):
+        dilation = self.dilation_growth ** (n % self.stack_size)
+        rf = rf + ((self.kernel_size - 1) * dilation)
+    return rf
+# setup the pre-trained models
+model_comp = torch.load("compressor_full.pt", map_location="cpu").eval()
+model_verb = torch.load("reverb_full.pt", map_location="cpu").eval()
+model_amp = torch.load("amp_full.pt", map_location="cpu").eval()
+model_delay = torch.load("delay_full.pt", map_location="cpu").eval()
+model_synth = torch.load("synth2synth_full.pt", map_location="cpu").eval()
+def inference(aud, effect_type):
+  x_p, sample_rate = torchaudio.load(aud.file)
+  effect_type = effect_type #@param ["Compressor", "Reverb", "Amp", "Analog Delay", "Synth2Synth"]
+  gain_dB = -24 #@param {type:"slider", min:-24, max:24, step:0.1}
+  c0 = -1.4 #@param {type:"slider", min:-10, max:10, step:0.1}
+  c1 = 3 #@param {type:"slider", min:-10, max:10, step:0.1}
+  mix = 70 #@param {type:"slider", min:0, max:100, step:1}
+  width = 50 #@param {type:"slider", min:0, max:100, step:1}
+  max_length = 30 #@param {type:"slider", min:5, max:120, step:1}
+  stereo = True #@param {type:"boolean"}
+  tail = True #@param {type:"boolean"}
+  # select model type
+  if effect_type == "Compressor":
+    pt_model = model_comp
+  elif effect_type == "Reverb":
+    pt_model = model_verb
+  elif effect_type == "Amp":
+    pt_model = model_amp
+  elif effect_type == "Analog Delay":
+    pt_model = model_delay
+  elif effect_type == "Synth2Synth":
+    pt_model = model_synth
+  # measure the receptive field
+  pt_model_rf = pt_model.compute_receptive_field()
+  # crop input signal if needed
+  max_samples = int(sample_rate * max_length)
+  x_p_crop = x_p[:,:max_samples]
+  chs = x_p_crop.shape[0]
+  # if mono and stereo requested
+  if chs == 1 and stereo:
+    x_p_crop = x_p_crop.repeat(2,1)
+    chs = 2
+  # pad the input signal
+  front_pad = pt_model_rf-1
+  back_pad = 0 if not tail else front_pad
+  x_p_pad = torch.nn.functional.pad(x_p_crop, (front_pad, back_pad))
+  # design highpass filter
+  sos = scipy.signal.butter(
+      8,
+      20.0,
+      fs=sample_rate,
+      output="sos",
+      btype="highpass"
+  )
+  # compute linear gain
+  gain_ln = 10 ** (gain_dB / 20.0)
+  # process audio with pre-trained model
+  with torch.no_grad():
+    y_hat = torch.zeros(x_p_crop.shape[0], x_p_crop.shape[1] + back_pad)
+    for n in range(chs):
+      if n == 0:
+        factor = (width*5e-3)
+      elif n == 1:
+        factor = -(width*5e-3)
+      c = torch.tensor([float(c0+factor), float(c1+factor)]).view(1,1,-1)
+      y_hat_ch = pt_model(gain_ln * x_p_pad[n,:].view(1,1,-1), c)
+      y_hat_ch = scipy.signal.sosfilt(sos, y_hat_ch.view(-1).numpy())
+      y_hat_ch = torch.tensor(y_hat_ch)
+      y_hat[n,:] = y_hat_ch
+  # pad the dry signal
+  x_dry = torch.nn.functional.pad(x_p_crop, (0,back_pad))
+  # normalize each first
+  y_hat /= y_hat.abs().max()
+  x_dry /= x_dry.abs().max()
+  # mix
+  mix = mix/100.0
+  y_hat = (mix * y_hat) + ((1-mix) * x_dry)
+  # remove transient
+  y_hat = y_hat[...,8192:]
+  y_hat /= y_hat.abs().max()
+  torchaudio.save("output.mp3", y_hat.view(chs,-1), sample_rate, compression=320.0)
+  return "output.mp3"