|
{ |
|
"model_type": "autoencoder", |
|
"sample_size": 12000, |
|
"sample_rate": 24000, |
|
"audio_channels": 1, |
|
"model": { |
|
"encoder": { |
|
"type": "oobleck", |
|
"config": { |
|
"in_channels": 1, |
|
"channels": 128, |
|
"c_mults": [1, 2, 4, 8], |
|
"strides": [2, 4, 6, 10], |
|
"latent_dim": 256, |
|
"use_snake": true |
|
} |
|
}, |
|
"decoder": { |
|
"type": "oobleck", |
|
"config": { |
|
"out_channels": 1, |
|
"channels": 128, |
|
"c_mults": [1, 2, 4, 8], |
|
"strides": [2, 4, 6, 10], |
|
"latent_dim": 128, |
|
"use_snake": true, |
|
"final_tanh": false |
|
} |
|
}, |
|
"bottleneck": { |
|
"type": "vae" |
|
}, |
|
"latent_dim": 128, |
|
"downsampling_ratio": 480, |
|
"io_channels": 1 |
|
}, |
|
"training": { |
|
"learning_rate": 1.5e-4, |
|
"warmup_steps": 0, |
|
"use_ema": false, |
|
"optimizer_configs": { |
|
"autoencoder": { |
|
"optimizer": { |
|
"type": "AdamW", |
|
"config": { |
|
"betas": [0.8, 0.99], |
|
"lr": 1.5e-4, |
|
"weight_decay": 1e-3 |
|
} |
|
}, |
|
"scheduler": { |
|
"type": "InverseLR", |
|
"config": { |
|
"inv_gamma": 200000, |
|
"power": 0.5, |
|
"warmup": 0.999 |
|
} |
|
} |
|
}, |
|
"discriminator": { |
|
"optimizer": { |
|
"type": "AdamW", |
|
"config": { |
|
"betas": [0.8, 0.99], |
|
"lr": 3e-4, |
|
"weight_decay": 1e-3 |
|
} |
|
}, |
|
"scheduler": { |
|
"type": "InverseLR", |
|
"config": { |
|
"inv_gamma": 200000, |
|
"power": 0.5, |
|
"warmup": 0.999 |
|
} |
|
} |
|
} |
|
}, |
|
"loss_configs": { |
|
"discriminator": { |
|
"type": "encodec", |
|
"config": { |
|
"filters": 64, |
|
"n_ffts": [1280, 640, 320, 160, 80], |
|
"hop_lengths": [320, 160, 80, 40, 20], |
|
"win_lengths": [1280, 640, 320, 160, 80] |
|
}, |
|
"weights": { |
|
"adversarial": 0.1, |
|
"feature_matching": 5.0 |
|
} |
|
}, |
|
"spectral": { |
|
"type": "mrstft", |
|
"config": { |
|
"fft_sizes": [1280, 640, 320, 160, 80, 40, 20], |
|
"hop_sizes": [320, 160, 80, 40, 20, 10, 5], |
|
"win_lengths": [1280, 640, 320, 160, 80, 40, 20], |
|
"perceptual_weighting": true |
|
}, |
|
"weights": { |
|
"mrstft": 1.0 |
|
} |
|
}, |
|
"time": { |
|
"type": "l1", |
|
"weights": { |
|
"l1": 0.0 |
|
} |
|
}, |
|
"bottleneck": { |
|
"type": "kl", |
|
"weights": { |
|
"kl": 1e-4 |
|
} |
|
} |
|
}, |
|
"demo": { |
|
"demo_every": 10000 |
|
} |
|
} |
|
} |