import gradio as gr import numpy as np import torch import yaml import json import pyloudnorm as pyln from hydra.utils import instantiate from random import normalvariate from soxr import resample from functools import partial from modules.utils import chain_functions, vec2statedict, get_chunks from modules.fx import clip_delay_eq_Q SLIDER_MAX = 3 SLIDER_MIN = -3 NUMBER_OF_PCS = 10 TEMPERATURE = 0.7 CONFIG_PATH = "presets/rt_config.yaml" PCA_PARAM_FILE = "presets/internal/gaussian.npz" INFO_PATH = "presets/internal/info.json" with open(CONFIG_PATH) as fp: fx_config = yaml.safe_load(fp)["model"] fx = instantiate(fx_config) fx.eval() pca_params = np.load(PCA_PARAM_FILE) mean = pca_params["mean"] cov = pca_params["cov"] eigvals, eigvecs = np.linalg.eigh(cov) eigvals = np.flip(eigvals, axis=0)[:75] eigvecs = np.flip(eigvecs, axis=1)[:, :75] U = eigvecs * np.sqrt(eigvals) U = torch.from_numpy(U).float() mean = torch.from_numpy(mean).float() with open(INFO_PATH) as f: info = json.load(f) param_keys = info["params_keys"] original_shapes = list( map(lambda lst: lst if len(lst) else [1], info["params_original_shapes"]) ) *vec2dict_args, _ = get_chunks(param_keys, original_shapes) vec2dict_args = [param_keys, original_shapes] + vec2dict_args vec2dict = partial( vec2statedict, **dict( zip( [ "keys", "original_shapes", "selected_chunks", "position", "U_matrix_shape", ], vec2dict_args, ) ), ) meter = pyln.Meter(44100) @torch.no_grad() def inference(audio, randomise_rest, *pcs): sr, y = audio if sr != 44100: y = resample(y, sr, 44100) if y.dtype.kind != "f": y = y / 32768.0 if y.ndim == 1: y = y[:, None] loudness = meter.integrated_loudness(y) y = pyln.normalize.loudness(y, loudness, -18.0) y = torch.from_numpy(y).float().T.unsqueeze(0) if y.shape[1] != 1: y = y.mean(dim=1, keepdim=True) M = eigvals.shape[0] z = torch.cat( [ torch.tensor([float(x) for x in pcs]), ( torch.randn(M - len(pcs)) * TEMPERATURE if randomise_rest else torch.zeros(M - len(pcs)) ), ] ) x = U @ z + mean fx.load_state_dict(vec2dict(x), strict=False) fx.apply(partial(clip_delay_eq_Q, Q=0.707)) rendered = fx(y).squeeze(0).T.numpy() if np.max(np.abs(rendered)) > 1: rendered = rendered / np.max(np.abs(rendered)) return (44100, (rendered * 32768).astype(np.int16)) def get_important_pcs(n=10, **kwargs): sliders = [ gr.Slider(minimum=SLIDER_MIN, maximum=SLIDER_MAX, label=f"PC {i}", **kwargs) for i in range(1, n + 1) ] return sliders with gr.Blocks() as demo: gr.Markdown( """ # Hadamard Transform This is a demo of the Hadamard transform. """ ) with gr.Row(): with gr.Column(): audio_input = gr.Audio(type="numpy", sources="upload", label="Input Audio") with gr.Row(): random_button = gr.Button( f"Randomise the first {NUMBER_OF_PCS} PCs", elem_id="randomise-button", ) reset_button = gr.Button( "Reset", elem_id="reset-button", ) render_button = gr.Button( "Run", elem_id="render-button", variant="primary" ) random_rest_checkbox = gr.Checkbox( label=f"Randomise PCs > {NUMBER_OF_PCS} (default to zeros)", value=False, elem_id="randomise-checkbox", ) sliders = get_important_pcs(NUMBER_OF_PCS, value=0) with gr.Column(): audio_output = gr.Audio( type="numpy", label="Output Audio", interactive=False ) render_button.click( inference, inputs=[ audio_input, random_rest_checkbox, ] + sliders, outputs=audio_output, ) random_button.click( lambda *xs: [ chain_functions( partial(max, SLIDER_MIN), partial(min, SLIDER_MAX), )(normalvariate(0, 1)) for _ in range(len(xs)) ], inputs=sliders, outputs=sliders, ) reset_button.click( lambda *xs: [0 for _ in range(len(xs))], inputs=sliders, outputs=sliders, ) demo.launch()