File size: 1,540 Bytes
1364f94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
{
  'train': {
    'log_interval': 100,
    'eval_interval': 200,
    'seed': 1234,
    'epochs': 15,
    'learning_rate': 0.0001,
    'betas': [
      0.8,
      0.99
    ],
    'eps': 1e-09,
    'batch_size': 8,
    'fp16_run': False,
    'lr_decay': 0.999875,
    'segment_size': 20480,
    'init_lr_ratio': 1,
    'warmup_epochs': 0,
    'c_mel': 45,
    'c_kl': 1.0
  },
  'data': {
    'max_wav_value': 32768.0,
    'sampling_rate': 32000,
    'filter_length': 2048,
    'hop_length': 640,
    'win_length': 2048,
    'n_mel_channels': 128,
    'mel_fmin': 0.0,
    'mel_fmax': None,
    'add_blank': True,
    'n_speakers': 300,
    'cleaned_text': True
  },
  'model': {
    'inter_channels': 192,
    'hidden_channels': 192,
    'filter_channels': 768,
    'n_heads': 2,
    'n_layers': 6,
    'kernel_size': 3,
    'p_dropout': 0.1,
    'resblock': '1',
    'resblock_kernel_sizes': [
      3,
      7,
      11
    ],
    'resblock_dilation_sizes': [
      [
        1,
        3,
        5
      ],
      [
        1,
        3,
        5
      ],
      [
        1,
        3,
        5
      ]
    ],
    'upsample_rates': [
      10,
      8,
      2,
      2,
      2
    ],
    'upsample_initial_channel': 512,
    'upsample_kernel_sizes': [
      16,
      16,
      8,
      2,
      2
    ],
    'n_layers_q': 3,
    'use_spectral_norm': False,
    'gin_channels': 512,
    'semantic_frame_rate': '25hz',
    'freeze_quantizer': True
  },
  's2_ckpt_dir': 'logs/s2/big2k1ft_ko_5commit2',
  'content_module': 'cnhubert'
}