|
{ |
|
'train': { |
|
'log_interval': 100, |
|
'eval_interval': 200, |
|
'seed': 1234, |
|
'epochs': 15, |
|
'learning_rate': 0.0001, |
|
'betas': [ |
|
0.8, |
|
0.99 |
|
], |
|
'eps': 1e-09, |
|
'batch_size': 8, |
|
'fp16_run': False, |
|
'lr_decay': 0.999875, |
|
'segment_size': 20480, |
|
'init_lr_ratio': 1, |
|
'warmup_epochs': 0, |
|
'c_mel': 45, |
|
'c_kl': 1.0 |
|
}, |
|
'data': { |
|
'max_wav_value': 32768.0, |
|
'sampling_rate': 32000, |
|
'filter_length': 2048, |
|
'hop_length': 640, |
|
'win_length': 2048, |
|
'n_mel_channels': 128, |
|
'mel_fmin': 0.0, |
|
'mel_fmax': None, |
|
'add_blank': True, |
|
'n_speakers': 300, |
|
'cleaned_text': True |
|
}, |
|
'model': { |
|
'inter_channels': 192, |
|
'hidden_channels': 192, |
|
'filter_channels': 768, |
|
'n_heads': 2, |
|
'n_layers': 6, |
|
'kernel_size': 3, |
|
'p_dropout': 0.1, |
|
'resblock': '1', |
|
'resblock_kernel_sizes': [ |
|
3, |
|
7, |
|
11 |
|
], |
|
'resblock_dilation_sizes': [ |
|
[ |
|
1, |
|
3, |
|
5 |
|
], |
|
[ |
|
1, |
|
3, |
|
5 |
|
], |
|
[ |
|
1, |
|
3, |
|
5 |
|
] |
|
], |
|
'upsample_rates': [ |
|
10, |
|
8, |
|
2, |
|
2, |
|
2 |
|
], |
|
'upsample_initial_channel': 512, |
|
'upsample_kernel_sizes': [ |
|
16, |
|
16, |
|
8, |
|
2, |
|
2 |
|
], |
|
'n_layers_q': 3, |
|
'use_spectral_norm': False, |
|
'gin_channels': 512, |
|
'semantic_frame_rate': '25hz', |
|
'freeze_quantizer': True |
|
}, |
|
's2_ckpt_dir': 'logs/s2/big2k1ft_ko_5commit2', |
|
'content_module': 'cnhubert' |
|
} |