generator:
  name: ScalarModel
  config:
    num_bands: 1
    sample_rate: 48000
    causal: true
    num_samples: 2
    downsample_factors:
    - 4
    - 5
    - 5
    - 5
    downsample_kernel_sizes:
    - 8
    - 10
    - 10
    - 10
    upsample_factors:
    - 5
    - 5
    - 5
    - 4
    upsample_kernel_sizes:
    - 10
    - 10
    - 10
    - 8
    latent_hidden_dim: 32
    default_kernel_size: 7
    delay_kernel_size: 5
    init_channel: 64
    res_kernel_size: 7
d_list:
- mfd
mfd:
  name: MultiFrequencyDiscriminator
  config:
    hop_lengths:
    - 32
    - 64
    - 128
    - 256
    - 512
    - 1024
    hidden_channels:
    - 64
    - 128
    - 256
    - 512
    - 512
    - 512
    domain: double
    mel_scale: true
    sample_rate: 48000
mpd:
  name: MultiPeriodDiscriminator
  config:
    period_sizes:
    - 2
    - 3
    - 5
    - 7
    - 11
    period_kernel_size: 5
msd:
  name: MultiScaleDiscriminator
  config:
    num_scales: 3
    pool_kernel_size: 4
    pool_stride: 2
optimizer:
  g:
    name: AdamW
    config:
      lr: 0.0002
      betas:
      - 0.8
      - 0.99
      eps: 1.0e-06
  d:
    name: AdamW
    config:
      lr: 0.0002
      betas:
      - 0.8
      - 0.99
      eps: 1.0e-06
lr_scheduler:
  g:
    name: ExponentialLR
    config:
      gamma: 0.999
  d:
    name: ExponentialLR
    config:
      gamma: 0.999
criterion:
  g_criterion:
    name: losses.generator_loss.GeneratorSTFTLoss
    config:
      use_mel_loss: false
      adv_criterion: MSEGLoss
      mel_loss_weight: 45
      use_feature_match: true
      feat_match_loss_weight: 20
      use_full_stft_loss: true
      use_sub_stft_loss: true
      full_stft_loss_weight: 1
      sub_stft_loss_weight: 1
      mel_scale_loss:
        sampling_rate: 48000
        n_fft: 1024
        num_mels: 80
        hop_size: 160
        win_size: 800
        fmin: 0
      full_multi_scale_stft_loss:
        fft_sizes:
        - 512
        - 1024
        - 2048
        win_sizes:
        - 480
        - 960
        - 1200
        hop_sizes:
        - 120
        - 240
        - 300
      sub_multi_scale_stft_loss:
        num_bands: 6
        fft_sizes:
        - 128
        - 256
        - 256
        win_sizes:
        - 80
        - 120
        - 200
        hop_sizes:
        - 20
        - 40
        - 50
  d_criterion:
    name: losses.discriminator_loss.MSEDiscriminatorLoss
    config: null
  commit_loss_weight: 1.0
training_file: train.scp
validation_file: val.scp
seed: 2333
cudnn_deterministic: false
tensorboard: true
checkpoint_interval: 5000
summary_interval: 100
validation_interval: 5000
num_epoches: 500
print_freq: 10
discriminator_iter_start: 0
num_ckpt_keep: 10
segment_size: 48000
audio_norm_scale: 0.95
batch_size: 12
num_workers: 8
num_plots: 8
local_rank: -1
basic_model_config: config/scalar48k.yaml
exp_model_config: null
log_dir: /apdcephfs/share_1316500/lavenywang/exp_data/codec/48k
hop_length: 2000
ngpus_per_node: 8
sample_rate: 48000
model_ckpt_dir: /apdcephfs/share_1316500/lavenywang/exp_data/codec/48k/model_ckpts