wangpuupup
commited on
Commit
•
f30f1c2
1
Parent(s):
a7953c3
Upload 7 files
Browse files- conf/decode_s2t_nl.yaml +9 -0
- conf/fbank.conf +2 -0
- conf/pbs.conf +11 -0
- conf/pitch.conf +1 -0
- conf/queue.conf +12 -0
- conf/slurm.conf +14 -0
- conf/train_cgn.yaml +99 -0
conf/decode_s2t_nl.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
beam_size: 1
|
2 |
+
penalty: 0.0
|
3 |
+
maxlenratio: 0.0
|
4 |
+
minlenratio: 0.0
|
5 |
+
ctc_weight: 0.0
|
6 |
+
lm_weight: 0.0
|
7 |
+
lang_sym: <nl>
|
8 |
+
task_sym: <asr>
|
9 |
+
predict_time: false
|
conf/fbank.conf
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
--sample-frequency=16000
|
2 |
+
--num-mel-bins=80
|
conf/pbs.conf
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default configuration
|
2 |
+
command qsub -V -v PATH -S /bin/bash
|
3 |
+
option name=* -N $0
|
4 |
+
option mem=* -l mem=$0
|
5 |
+
option mem=0 # Do not add anything to qsub_opts
|
6 |
+
option num_threads=* -l ncpus=$0
|
7 |
+
option num_threads=1 # Do not add anything to qsub_opts
|
8 |
+
option num_nodes=* -l nodes=$0:ppn=1
|
9 |
+
default gpu=0
|
10 |
+
option gpu=0
|
11 |
+
option gpu=* -l ngpus=$0
|
conf/pitch.conf
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
--sample-frequency=16000
|
conf/queue.conf
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default configuration
|
2 |
+
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
|
3 |
+
option name=* -N $0
|
4 |
+
option mem=* -l mem_free=$0,ram_free=$0
|
5 |
+
option mem=0 # Do not add anything to qsub_opts
|
6 |
+
option num_threads=* -pe smp $0
|
7 |
+
option num_threads=1 # Do not add anything to qsub_opts
|
8 |
+
option max_jobs_run=* -tc $0
|
9 |
+
option num_nodes=* -pe mpi $0 # You must set this PE as allocation_rule=1
|
10 |
+
default gpu=0
|
11 |
+
option gpu=0
|
12 |
+
option gpu=* -l gpu=$0 -q g.q
|
conf/slurm.conf
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default configuration
|
2 |
+
command sbatch --export=PATH
|
3 |
+
option name=* --job-name $0
|
4 |
+
option time=* --time $0
|
5 |
+
option mem=* --mem-per-cpu $0
|
6 |
+
option mem=0
|
7 |
+
option num_threads=* --cpus-per-task $0
|
8 |
+
option num_threads=1 --cpus-per-task 1
|
9 |
+
option num_nodes=* --nodes $0
|
10 |
+
default gpu=0
|
11 |
+
option gpu=0 -p cpu
|
12 |
+
option gpu=* -p gpu --gres=gpu:$0 -c $0 # Recommend allocating more CPU than, or equal to the number of GPU
|
13 |
+
# note: the --max-jobs-run option is supported as a special case
|
14 |
+
# by slurm.pl and you don't have to handle it in the config file.
|
conf/train_cgn.yaml
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
preprocessor: s2t
|
2 |
+
preprocessor_conf:
|
3 |
+
text_prev_name: text_prev
|
4 |
+
text_ctc_name: text_ctc
|
5 |
+
fs: 16000
|
6 |
+
na_symbol: "<na>"
|
7 |
+
speech_length: 30
|
8 |
+
speech_resolution: 0.02
|
9 |
+
speech_init_silence: 30
|
10 |
+
text_prev_apply_prob: 0.0
|
11 |
+
time_apply_prob: 0.0
|
12 |
+
notime_symbol: "<notimestamps>"
|
13 |
+
first_time_symbol: "<0.00>"
|
14 |
+
last_time_symbol: "<30.00>"
|
15 |
+
|
16 |
+
frontend_conf:
|
17 |
+
n_fft: 512
|
18 |
+
win_length: 400
|
19 |
+
hop_length: 160
|
20 |
+
|
21 |
+
specaug: specaug
|
22 |
+
specaug_conf:
|
23 |
+
apply_time_warp: false
|
24 |
+
time_warp_window: 5
|
25 |
+
time_warp_mode: bicubic
|
26 |
+
apply_freq_mask: true
|
27 |
+
freq_mask_width_range:
|
28 |
+
- 0
|
29 |
+
- 27
|
30 |
+
num_freq_mask: 2
|
31 |
+
apply_time_mask: true
|
32 |
+
time_mask_width_ratio_range:
|
33 |
+
- 0.
|
34 |
+
- 0.05
|
35 |
+
num_time_mask: 5
|
36 |
+
|
37 |
+
normalize: global_mvn
|
38 |
+
normalize_conf:
|
39 |
+
stats_file: /espnet/egs2/owsm_v1/s2t1/exp/s2t_stats_raw_bpe20000/train/feats_stats.npz
|
40 |
+
|
41 |
+
encoder: transformer
|
42 |
+
encoder_conf:
|
43 |
+
output_size: 768 # dimension of attention
|
44 |
+
attention_heads: 12
|
45 |
+
linear_units: 3072 # the number of units of position-wise feed forward
|
46 |
+
num_blocks: 12 # the number of encoder blocks
|
47 |
+
dropout_rate: 0.1
|
48 |
+
positional_dropout_rate: 0.1
|
49 |
+
attention_dropout_rate: 0.1
|
50 |
+
input_layer: conv2d2 # encoder architecture type
|
51 |
+
normalize_before: true
|
52 |
+
|
53 |
+
decoder: adptransformer
|
54 |
+
decoder_conf:
|
55 |
+
attention_heads: 12
|
56 |
+
linear_units: 3072
|
57 |
+
num_blocks: 12
|
58 |
+
dropout_rate: 0.1
|
59 |
+
positional_dropout_rate: 0.1
|
60 |
+
self_attention_dropout_rate: 0.1
|
61 |
+
src_attention_dropout_rate: 0.1
|
62 |
+
|
63 |
+
model_conf:
|
64 |
+
ctc_weight: 0.3
|
65 |
+
lsm_weight: 0.1
|
66 |
+
length_normalized_loss: false
|
67 |
+
sym_na: "<na>"
|
68 |
+
|
69 |
+
optim: adamw
|
70 |
+
optim_conf:
|
71 |
+
lr: 0.00055
|
72 |
+
betas:
|
73 |
+
- 0.9
|
74 |
+
- 0.98
|
75 |
+
eps: 1.0e-06
|
76 |
+
weight_decay: 0.0
|
77 |
+
scheduler: warmuplr
|
78 |
+
scheduler_conf:
|
79 |
+
warmup_steps: 10000
|
80 |
+
|
81 |
+
# 4 GPU/node x 8 nodes = 32 A100
|
82 |
+
batch_type: unsorted
|
83 |
+
batch_size: 5
|
84 |
+
accum_grad: 4
|
85 |
+
num_iters_per_epoch: 40000
|
86 |
+
max_epoch: 10
|
87 |
+
patience: none
|
88 |
+
init: none
|
89 |
+
best_model_criterion:
|
90 |
+
- - valid
|
91 |
+
- acc
|
92 |
+
- max
|
93 |
+
keep_nbest_models: 1
|
94 |
+
use_amp: true
|
95 |
+
num_workers: 4
|
96 |
+
|
97 |
+
init_param:
|
98 |
+
- /espnet/egs2/owsm_v1/s2t1/exp/s2t_train_raw_bpe20000/valid.acc.ave.pth
|
99 |
+
ignore_init_mismatch: false
|