xzuyn's picture
Create s2D2333k.json
1364f94 verified
{
'train': {
'log_interval': 100,
'eval_interval': 200,
'seed': 1234,
'epochs': 15,
'learning_rate': 0.0001,
'betas': [
0.8,
0.99
],
'eps': 1e-09,
'batch_size': 8,
'fp16_run': False,
'lr_decay': 0.999875,
'segment_size': 20480,
'init_lr_ratio': 1,
'warmup_epochs': 0,
'c_mel': 45,
'c_kl': 1.0
},
'data': {
'max_wav_value': 32768.0,
'sampling_rate': 32000,
'filter_length': 2048,
'hop_length': 640,
'win_length': 2048,
'n_mel_channels': 128,
'mel_fmin': 0.0,
'mel_fmax': None,
'add_blank': True,
'n_speakers': 300,
'cleaned_text': True
},
'model': {
'inter_channels': 192,
'hidden_channels': 192,
'filter_channels': 768,
'n_heads': 2,
'n_layers': 6,
'kernel_size': 3,
'p_dropout': 0.1,
'resblock': '1',
'resblock_kernel_sizes': [
3,
7,
11
],
'resblock_dilation_sizes': [
[
1,
3,
5
],
[
1,
3,
5
],
[
1,
3,
5
]
],
'upsample_rates': [
10,
8,
2,
2,
2
],
'upsample_initial_channel': 512,
'upsample_kernel_sizes': [
16,
16,
8,
2,
2
],
'n_layers_q': 3,
'use_spectral_norm': False,
'gin_channels': 512,
'semantic_frame_rate': '25hz',
'freeze_quantizer': True
},
's2_ckpt_dir': 'logs/s2/big2k1ft_ko_5commit2',
'content_module': 'cnhubert'
}