File size: 1,716 Bytes
e90f765
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
train:
  seed: 1234
  epochs: 100
  batch_size: 6
  gradient_accumulation: 4
  save_every_n_epoch: 1
  precision: 32
  gradient_clip: 1.0
optimizer:
  lr: 0.01
  lr_init: 0.00001
  lr_end: 0.0001
  warmup_steps: 2000
  decay_steps: 40000
data:
  max_eval_sample: 8
  max_sec: 40
  num_workers: 1
  pad_val: 1024 # same with EOS in model
model:
  saving_path: "ckpt/"
  resume_checkpoint: null
  vocoder_config_path: "quantizer/new_ckpt/config.json"
  vocoder_ckpt_path: "quantizer/new_ckpt/g_00600000"
  datadir: "/home/liweiche/GigaSpeech/wavs"
  metapath: "/home/liweiche/GigaSpeech/train2.json"
  val_metapath: "/home/liweiche/GigaSpeech/dev2.json"
  sampledir: "logs/"
  pretrained_path: null
  lr: 0.0001
  batch_size: 200.0
  train_bucket_size: 8192
  training_step: 800000
  optim_flat_percent: 0.0
  warmup_step: 50
  adam_beta1: 0.9
  adam_beta2: 0.98
  ffd_size: 3072
  hidden_size: 768
  enc_nlayers: 6
  dec_nlayers: 6
  nheads: 12
  ar_layer: 4
  ar_ffd_size: 1024
  ar_hidden_size: 256
  ar_nheads: 4
  aligner_softmax_temp: 1.0
  layer_norm_eps: 0.00001
  speaker_embed_dropout: 0.05
  label_smoothing: 0.0
  val_check_interval: 5000
  check_val_every_n_epoch: 1
  precision: "fp16"
  nworkers: 16
  distributed: true
  accelerator: "ddp"
  version: null
  accumulate_grad_batches: 1
  use_repetition_token: true
  use_repetition_gating: false
  repetition_penalty: 1.0
  sampling_temperature: 1.0
  top_k: -1
  min_top_k: 3
  top_p: 0.8
  sample_num: 4
  length_penalty_max_length: 15000
  length_penalty_max_prob: 0.95
  max_input_length: 2048
  max_output_length: 2000
  sample_rate: 16000
  n_codes: 1024
  n_cluster_groups: 1
  phone_context_window: 4
  phoneset_size: 1000
inference:
  top_k: 5