YosefAyele commited on
Commit
466446e
·
1 Parent(s): e9c0f26

add training and visualization scripts and logs

Browse files
Training/env.log ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SpeechBrain system description
2
+ ==============================
3
+ Python version:
4
+ 3.9.7 (default, Sep 16 2021, 13:09:58)
5
+ [GCC 7.5.0]
6
+ ==============================
7
+ Installed Python packages:
8
+ aiohttp==3.8.1
9
+ aiosignal==1.2.0
10
+ appdirs==1.4.4
11
+ async-timeout==4.0.2
12
+ attrs==21.4.0
13
+ audioread==2.1.9
14
+ audiosegment==0.23.0
15
+ azure-core==1.21.1
16
+ azure-storage-blob==12.9.0
17
+ bcrypt==3.2.0
18
+ black==19.10b0
19
+ certifi==2021.10.8
20
+ cffi==1.15.0
21
+ cfgv==3.3.1
22
+ charset-normalizer==2.0.10
23
+ click==8.0.3
24
+ cryptography==36.0.1
25
+ datasets==1.13.3
26
+ decorator==5.1.1
27
+ dill==0.3.4
28
+ distlib==0.3.4
29
+ entrypoints==0.3
30
+ ffmpeg==1.4
31
+ filelock==3.4.2
32
+ flake8==3.7.9
33
+ frozenlist==1.3.0
34
+ fsspec==2022.2.0
35
+ huggingface-hub==0.5.1
36
+ HyperPyYAML==1.0.1
37
+ identify==2.4.4
38
+ idna==3.3
39
+ isodate==0.6.1
40
+ joblib==1.1.0
41
+ librosa==0.8.1
42
+ llvmlite==0.38.0
43
+ mccabe==0.6.1
44
+ more-itertools==8.12.0
45
+ msrest==0.6.21
46
+ multidict==6.0.2
47
+ multiprocess==0.70.12.2
48
+ mutagen==1.45.1
49
+ nodeenv==1.6.0
50
+ numba==0.55.0
51
+ numpy==1.21.5
52
+ oauthlib==3.1.1
53
+ packaging==21.3
54
+ pandas==1.3.5
55
+ paramiko==2.10.3
56
+ pathspec==0.9.0
57
+ platformdirs==2.4.1
58
+ pluggy==0.13.1
59
+ pooch==1.5.2
60
+ pre-commit==2.17.0
61
+ py==1.11.0
62
+ pyarrow==7.0.0
63
+ pycodestyle==2.5.0
64
+ pycparser==2.21
65
+ pydub==0.25.1
66
+ pyflakes==2.1.1
67
+ PyNaCl==1.5.0
68
+ pyparsing==3.0.6
69
+ pytest==5.4.1
70
+ python-dateutil==2.8.2
71
+ pytz==2021.3
72
+ PyYAML==6.0
73
+ regex==2022.1.18
74
+ requests==2.27.1
75
+ requests-oauthlib==1.3.0
76
+ resampy==0.2.2
77
+ ruamel.yaml==0.17.21
78
+ ruamel.yaml.clib==0.2.6
79
+ sacremoses==0.0.53
80
+ scikit-learn==1.0.2
81
+ scipy==1.7.3
82
+ scp==0.14.4
83
+ sentencepiece==0.1.96
84
+ six==1.16.0
85
+ SoundFile==0.10.3.post1
86
+ speechbrain==0.5.11
87
+ threadpoolctl==3.0.0
88
+ tokenizers==0.12.1
89
+ toml==0.10.2
90
+ torch==1.11.0
91
+ torchaudio==0.11.0
92
+ tqdm==4.62.3
93
+ transformers==4.18.0
94
+ typed-ast==1.5.1
95
+ typing_extensions==4.0.1
96
+ urllib3==1.26.8
97
+ virtualenv==20.13.0
98
+ wcwidth==0.2.5
99
+ webrtcvad==2.0.10
100
+ xxhash==3.0.0
101
+ yamllint==1.23.0
102
+ yarl==1.7.2
103
+ youtube-dl==2021.12.17
104
+ ==============================
105
+ Could not get git revision==============================
106
+ Cuda version:
107
+ 10.2
Training/hyperparams.yaml ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2022-05-27 from:
2
+ # /data/n.abdoumohamed/dvoice-africa/speechbrain/recipes/DVoice/ASR/CTC/hparams/train_amharic.yaml
3
+ # yamllint disable
4
+ # ################################
5
+ # Model: wav2vec2 + DNN + CTC
6
+ # Augmentation: SpecAugment
7
+ # Authors: Titouan Parcollet 2021
8
+ # ################################
9
+
10
+ # Seed needs to be set at top of yaml, before objects with parameters are made
11
+ seed: 1249
12
+ __set_seed: !!python/object/apply:torch.manual_see
13
+ output_folder: results/wav2vec2_ctc_AMHARIC/1249
14
+ wer_file: results/wav2vec2_ctc_AMHARIC/1249/wer.txt
15
+ save_folder: results/wav2vec2_ctc_AMHARIC/1249/save
16
+ train_log: results/wav2vec2_ctc_AMHARIC/1249/train_log.txt
17
+
18
+ # URL for the biggest LeBenchmark wav2vec french.
19
+ wav2vec2_hub: facebook/wav2vec2-large-xlsr-53
20
+
21
+ # Data files
22
+ data_folder: ASR/AMHARIC/data # e.g, /localscratch/cv-corpus-5.1-2020-06-22/fr
23
+ train_csv_file: ASR/AMHARIC/data/train.csv # Standard CommonVoice .tsv files
24
+ dev_csv_file: ASR/AMHARIC/data/dev.csv # Standard CommonVoice .tsv files
25
+ test_csv_file: ASR/AMHARIC/data/test.csv # Standard CommonVoice .tsv files
26
+ accented_letters: true
27
+ language: amharic
28
+ train_csv: results/wav2vec2_ctc_AMHARIC/save/train.csv
29
+ valid_csv: results/wav2vec2_ctc_AMHARIC/save/dev.csv
30
+ test_csv: results/wav2vec2_ctc_AMHARIC/save/test.csv
31
+ skip_prep: false # Skip data preparation
32
+ data_augmentation: false # Skip data augmentation
33
+
34
+ # We remove utterance slonger than 10s in the train/dev/test sets as
35
+ # longer sentences certainly correspond to "open microphones".
36
+ avoid_if_longer_than: 15.0
37
+
38
+ # Training parameters
39
+ number_of_epochs: 30
40
+ number_of_ctc_epochs: 15
41
+ lr: 1.0
42
+ lr_wav2vec: 0.0001
43
+ ctc_weight: 0.3
44
+ sorting: ascending
45
+ auto_mix_prec: false
46
+ sample_rate: 16000
47
+ ckpt_interval_minutes: 30 # save checkpoint every N min
48
+
49
+ # With data_parallel batch_size is split into N jobs
50
+ # With DDP batch_size is multiplied by N jobs
51
+ # Must be 6 per GPU to fit 16GB of VRAM
52
+ batch_size: 4
53
+ test_batch_size: 4
54
+
55
+ dataloader_options:
56
+ batch_size: 4
57
+ num_workers: 2
58
+ test_dataloader_options:
59
+ batch_size: 4
60
+ num_workers: 2
61
+
62
+ # BPE parameters
63
+ token_type: char # ["unigram", "bpe", "char"]
64
+ character_coverage: 1.0
65
+
66
+ # Model parameters
67
+ activation: !name:torch.nn.LeakyReLU
68
+ wav2vec_output_dim: 1024
69
+ dnn_neurons: 1024
70
+ freeze_wav2vec: false
71
+
72
+ # Outputs
73
+ output_neurons: 224 # BPE size, index(blank/eos/bos) = 0
74
+
75
+ # Decoding parameters
76
+ # Be sure that the bos and eos index match with the BPEs ones
77
+ blank_index: 0
78
+ bos_index: 1
79
+ eos_index: 2
80
+ min_decode_ratio: 0.0
81
+ max_decode_ratio: 1.0
82
+ beam_size: 80
83
+ eos_threshold: 1.5
84
+ using_max_attn_shift: true
85
+ max_attn_shift: 140
86
+ ctc_weight_decode: 0.0
87
+ temperature: 1.50
88
+
89
+ #
90
+ # Functions and classes
91
+ #
92
+ epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
93
+
94
+ limit: 30
95
+
96
+ augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
97
+ sample_rate: 16000
98
+ speeds: [95, 100, 105]
99
+
100
+ enc: &id002 !new:speechbrain.nnet.containers.Sequential
101
+ input_shape: [null, null, 1024]
102
+ linear1: !name:speechbrain.nnet.linear.Linear
103
+ n_neurons: 1024
104
+ bias: true
105
+ bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
106
+ activation: !new:torch.nn.LeakyReLU
107
+ drop: !new:torch.nn.Dropout
108
+ p: 0.15
109
+ linear2: !name:speechbrain.nnet.linear.Linear
110
+ n_neurons: 1024
111
+ bias: true
112
+ bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
113
+ activation2: !new:torch.nn.LeakyReLU
114
+ drop2: !new:torch.nn.Dropout
115
+ p: 0.15
116
+ linear3: !name:speechbrain.nnet.linear.Linear
117
+ n_neurons: 1024
118
+ bias: true
119
+ bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
120
+ activation3: !new:torch.nn.LeakyReLU
121
+
122
+ wav2vec2: &id001 !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
123
+ source: facebook/wav2vec2-large-xlsr-53
124
+ output_norm: true
125
+ freeze: false
126
+ save_path: results/wav2vec2_ctc_AMHARIC/1249/save/wav2vec2_checkpoint
127
+
128
+ #####
129
+ # Uncomment this block if you prefer to use a Fairseq pretrained model instead
130
+ # of a HuggingFace one. Here, we provide an URL that is obtained from the
131
+ # Fairseq github for the multilingual XLSR.
132
+ #
133
+ #wav2vec2_url: https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr_53_56k.pt
134
+ #wav2vec2: !new:speechbrain.lobes.models.fairseq_wav2vec.FairseqWav2Vec2
135
+ # pretrained_path: !ref <wav2vec2_url>
136
+ # output_norm: True
137
+ # freeze: False
138
+ # save_path: !ref <save_folder>/wav2vec2_checkpoint/model.pt
139
+ #####
140
+
141
+
142
+ ctc_lin: &id003 !new:speechbrain.nnet.linear.Linear
143
+
144
+ input_size: 1024
145
+ n_neurons: 224
146
+
147
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
148
+ apply_log: true
149
+
150
+ ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
151
+ blank_index: 0
152
+
153
+ modules:
154
+ wav2vec2: *id001
155
+ enc: *id002
156
+ ctc_lin: *id003
157
+ model: &id004 !new:torch.nn.ModuleList
158
+ - [*id002, *id003]
159
+ model_opt_class: !name:torch.optim.Adadelta
160
+ lr: 1.0
161
+ rho: 0.95
162
+ eps: 1.e-8
163
+
164
+ wav2vec_opt_class: !name:torch.optim.Adam
165
+ lr: 0.0001
166
+
167
+ lr_annealing_model: &id005 !new:speechbrain.nnet.schedulers.NewBobScheduler
168
+ initial_value: 1.0
169
+ improvement_threshold: 0.0025
170
+ annealing_factor: 0.8
171
+ patient: 0
172
+
173
+ lr_annealing_wav2vec: &id006 !new:speechbrain.nnet.schedulers.NewBobScheduler
174
+ initial_value: 0.0001
175
+ improvement_threshold: 0.0025
176
+ annealing_factor: 0.9
177
+ patient: 0
178
+
179
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
180
+ checkpoints_dir: results/wav2vec2_ctc_AMHARIC/1249/save
181
+ recoverables:
182
+ wav2vec2: *id001
183
+ model: *id004
184
+ scheduler_model: *id005
185
+ scheduler_wav2vec: *id006
186
+ counter: *id007
187
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
188
+ save_file: results/wav2vec2_ctc_AMHARIC/1249/train_log.txt
189
+
190
+ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
191
+
192
+ cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
193
+ split_tokens: true
Training/log.txt ADDED
@@ -0,0 +1,937 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-04-05 14:25:26,074 - speechbrain.core - INFO - Beginning experiment!
2
+ 2022-04-05 14:25:26,075 - speechbrain.core - INFO - Experiment folder: results/wav2vec2_ctc_AMHARIC/1249
3
+ 2022-04-05 14:25:26,762 - speechbrain.utils.superpowers - DEBUG - aiohttp==3.8.1
4
+ aiosignal==1.2.0
5
+ appdirs==1.4.4
6
+ async-timeout==4.0.2
7
+ attrs==21.4.0
8
+ audioread==2.1.9
9
+ azure-core==1.21.1
10
+ azure-storage-blob==12.9.0
11
+ bcrypt==3.2.0
12
+ black==19.10b0
13
+ certifi==2021.10.8
14
+ cffi==1.15.0
15
+ cfgv==3.3.1
16
+ charset-normalizer==2.0.10
17
+ click==8.0.3
18
+ cryptography==36.0.1
19
+ datasets==1.13.3
20
+ decorator==5.1.1
21
+ dill==0.3.4
22
+ distlib==0.3.4
23
+ entrypoints==0.3
24
+ ffmpeg==1.4
25
+ filelock==3.4.2
26
+ flake8==3.7.9
27
+ frozenlist==1.3.0
28
+ fsspec==2022.2.0
29
+ huggingface-hub==0.2.1
30
+ HyperPyYAML==1.0.0
31
+ identify==2.4.4
32
+ idna==3.3
33
+ isodate==0.6.1
34
+ joblib==1.1.0
35
+ librosa==0.8.1
36
+ llvmlite==0.38.0
37
+ mccabe==0.6.1
38
+ more-itertools==8.12.0
39
+ msrest==0.6.21
40
+ multidict==6.0.2
41
+ multiprocess==0.70.12.2
42
+ mutagen==1.45.1
43
+ nodeenv==1.6.0
44
+ numba==0.55.0
45
+ numpy==1.21.5
46
+ oauthlib==3.1.1
47
+ packaging==21.3
48
+ pandas==1.3.5
49
+ paramiko==2.10.3
50
+ pathspec==0.9.0
51
+ platformdirs==2.4.1
52
+ pluggy==0.13.1
53
+ pooch==1.5.2
54
+ pre-commit==2.17.0
55
+ py==1.11.0
56
+ pyarrow==7.0.0
57
+ pycodestyle==2.5.0
58
+ pycparser==2.21
59
+ pyflakes==2.1.1
60
+ PyNaCl==1.5.0
61
+ pyparsing==3.0.6
62
+ pytest==5.4.1
63
+ python-dateutil==2.8.2
64
+ pytz==2021.3
65
+ PyYAML==6.0
66
+ regex==2022.1.18
67
+ requests==2.27.1
68
+ requests-oauthlib==1.3.0
69
+ resampy==0.2.2
70
+ ruamel.yaml==0.17.20
71
+ ruamel.yaml.clib==0.2.6
72
+ sacremoses==0.0.47
73
+ scikit-learn==1.0.2
74
+ scipy==1.7.3
75
+ scp==0.14.4
76
+ sentencepiece==0.1.96
77
+ six==1.16.0
78
+ SoundFile==0.10.3.post1
79
+ threadpoolctl==3.0.0
80
+ tokenizers==0.10.3
81
+ toml==0.10.2
82
+ torch==1.10.1
83
+ torchaudio==0.10.1
84
+ tqdm==4.62.3
85
+ transformers==4.13.0
86
+ typed-ast==1.5.1
87
+ typing_extensions==4.0.1
88
+ urllib3==1.26.8
89
+ virtualenv==20.13.0
90
+ wcwidth==0.2.5
91
+ xxhash==3.0.0
92
+ yamllint==1.23.0
93
+ yarl==1.7.2
94
+
95
+
96
+ 2022-04-05 14:25:26,922 - dvoice_prepare - INFO - Preparing CSV files for 7612 samples ...
97
+ 2022-04-05 14:25:26,922 - dvoice_prepare - INFO - Creating csv lists in results/wav2vec2_ctc_AMHARIC/1249/save/train.csv ...
98
+ 2022-04-05 14:25:27,322 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/train.csv successfully created!
99
+ 2022-04-05 14:25:27,322 - dvoice_prepare - INFO - Number of samples: 7612
100
+ 2022-04-05 14:25:27,322 - dvoice_prepare - INFO - Total duration: 14.05 Hours
101
+ 2022-04-05 14:25:27,382 - dvoice_prepare - INFO - Preparing CSV files for 1631 samples ...
102
+ 2022-04-05 14:25:27,382 - dvoice_prepare - INFO - Creating csv lists in results/wav2vec2_ctc_AMHARIC/1249/save/dev.csv ...
103
+ 2022-04-05 14:25:27,434 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/dev.csv successfully created!
104
+ 2022-04-05 14:25:27,435 - dvoice_prepare - INFO - Number of samples: 1631
105
+ 2022-04-05 14:25:27,435 - dvoice_prepare - INFO - Total duration: 2.95 Hours
106
+ 2022-04-05 14:25:27,477 - dvoice_prepare - INFO - Preparing CSV files for 1632 samples ...
107
+ 2022-04-05 14:25:27,477 - dvoice_prepare - INFO - Creating csv lists in results/wav2vec2_ctc_AMHARIC/1249/save/test.csv ...
108
+ 2022-04-05 14:25:27,546 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/test.csv successfully created!
109
+ 2022-04-05 14:25:27,546 - dvoice_prepare - INFO - Number of samples: 1632
110
+ 2022-04-05 14:25:27,546 - dvoice_prepare - INFO - Total duration: 3.03 Hours
111
+ 2022-04-05 14:25:27,552 - speechbrain.tokenizers.SentencePiece - INFO - Train tokenizer with type:char
112
+ 2022-04-05 14:25:27,557 - speechbrain.tokenizers.SentencePiece - INFO - Extract wrd sequences from:results/wav2vec2_ctc_AMHARIC/1249/save/train.csv
113
+ 2022-04-05 14:25:27,707 - speechbrain.tokenizers.SentencePiece - INFO - Text file created at: results/wav2vec2_ctc_AMHARIC/1249/save/train.txt
114
+ 2022-04-05 14:25:27,870 - speechbrain.tokenizers.SentencePiece - INFO - ==== Loading Tokenizer ===
115
+ 2022-04-05 14:25:27,870 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer path: results/wav2vec2_ctc_AMHARIC/1249/save/224_char.model
116
+ 2022-04-05 14:25:27,870 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer vocab_size: 224
117
+ 2022-04-05 14:25:27,870 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer type: char
118
+ 2022-04-05 14:25:27,990 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
119
+ 2022-04-05 14:25:27,991 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
120
+ 2022-04-05 14:25:30,936 - speechbrain.core - INFO - 318.8M trainable parameters in ASR
121
+ 2022-04-05 14:25:30,940 - speechbrain.utils.checkpoints - INFO - Would load a checkpoint here, but none found yet.
122
+ 2022-04-05 14:25:30,940 - speechbrain.utils.epoch_loop - INFO - Going into epoch 1
123
+ 2022-04-05 14:37:08,995 - speechbrain.core - ERROR - Exception:
124
+ Traceback (most recent call last):
125
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 366, in <module>
126
+ asr_brain.fit(
127
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/core.py", line 1034, in fit
128
+ loss = self.fit_batch(batch)
129
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 120, in fit_batch
130
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
131
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 61, in compute_forward
132
+ feats = self.modules.wav2vec2(wavs)
133
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
134
+ return forward_call(*input, **kwargs)
135
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/lobes/models/huggingface_wav2vec.py", line 254, in forward
136
+ return self.extract_features(wav)
137
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/lobes/models/huggingface_wav2vec.py", line 269, in extract_features
138
+ out = self.model(wav)[0]
139
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
140
+ return forward_call(*input, **kwargs)
141
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1253, in forward
142
+ encoder_outputs = self.encoder(
143
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
144
+ return forward_call(*input, **kwargs)
145
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 846, in forward
146
+ layer_outputs = layer(
147
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
148
+ return forward_call(*input, **kwargs)
149
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 687, in forward
150
+ hidden_states = hidden_states + self.feed_forward(self.final_layer_norm(hidden_states))
151
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
152
+ return forward_call(*input, **kwargs)
153
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 622, in forward
154
+ hidden_states = self.intermediate_dense(hidden_states)
155
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
156
+ return forward_call(*input, **kwargs)
157
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 103, in forward
158
+ return F.linear(input, self.weight, self.bias)
159
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
160
+ return torch._C._nn.linear(input, weight, bias)
161
+ RuntimeError: CUDA out of memory. Tried to allocate 24.00 MiB (GPU 0; 11.91 GiB total capacity; 10.77 GiB already allocated; 25.25 MiB free; 10.94 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
162
+ 2022-04-05 21:06:45,197 - speechbrain.core - INFO - Beginning experiment!
163
+ 2022-04-05 21:06:45,198 - speechbrain.core - INFO - Experiment folder: results/wav2vec2_ctc_AMHARIC/1249
164
+ 2022-04-05 21:06:45,836 - speechbrain.utils.superpowers - DEBUG - aiohttp==3.8.1
165
+ aiosignal==1.2.0
166
+ appdirs==1.4.4
167
+ async-timeout==4.0.2
168
+ attrs==21.4.0
169
+ audioread==2.1.9
170
+ azure-core==1.21.1
171
+ azure-storage-blob==12.9.0
172
+ bcrypt==3.2.0
173
+ black==19.10b0
174
+ certifi==2021.10.8
175
+ cffi==1.15.0
176
+ cfgv==3.3.1
177
+ charset-normalizer==2.0.10
178
+ click==8.0.3
179
+ cryptography==36.0.1
180
+ datasets==1.13.3
181
+ decorator==5.1.1
182
+ dill==0.3.4
183
+ distlib==0.3.4
184
+ entrypoints==0.3
185
+ ffmpeg==1.4
186
+ filelock==3.4.2
187
+ flake8==3.7.9
188
+ frozenlist==1.3.0
189
+ fsspec==2022.2.0
190
+ huggingface-hub==0.2.1
191
+ HyperPyYAML==1.0.0
192
+ identify==2.4.4
193
+ idna==3.3
194
+ isodate==0.6.1
195
+ joblib==1.1.0
196
+ librosa==0.8.1
197
+ llvmlite==0.38.0
198
+ mccabe==0.6.1
199
+ more-itertools==8.12.0
200
+ msrest==0.6.21
201
+ multidict==6.0.2
202
+ multiprocess==0.70.12.2
203
+ mutagen==1.45.1
204
+ nodeenv==1.6.0
205
+ numba==0.55.0
206
+ numpy==1.21.5
207
+ oauthlib==3.1.1
208
+ packaging==21.3
209
+ pandas==1.3.5
210
+ paramiko==2.10.3
211
+ pathspec==0.9.0
212
+ platformdirs==2.4.1
213
+ pluggy==0.13.1
214
+ pooch==1.5.2
215
+ pre-commit==2.17.0
216
+ py==1.11.0
217
+ pyarrow==7.0.0
218
+ pycodestyle==2.5.0
219
+ pycparser==2.21
220
+ pyflakes==2.1.1
221
+ PyNaCl==1.5.0
222
+ pyparsing==3.0.6
223
+ pytest==5.4.1
224
+ python-dateutil==2.8.2
225
+ pytz==2021.3
226
+ PyYAML==6.0
227
+ regex==2022.1.18
228
+ requests==2.27.1
229
+ requests-oauthlib==1.3.0
230
+ resampy==0.2.2
231
+ ruamel.yaml==0.17.20
232
+ ruamel.yaml.clib==0.2.6
233
+ sacremoses==0.0.47
234
+ scikit-learn==1.0.2
235
+ scipy==1.7.3
236
+ scp==0.14.4
237
+ sentencepiece==0.1.96
238
+ six==1.16.0
239
+ SoundFile==0.10.3.post1
240
+ threadpoolctl==3.0.0
241
+ tokenizers==0.10.3
242
+ toml==0.10.2
243
+ torch==1.10.1
244
+ torchaudio==0.10.1
245
+ tqdm==4.62.3
246
+ transformers==4.13.0
247
+ typed-ast==1.5.1
248
+ typing_extensions==4.0.1
249
+ urllib3==1.26.8
250
+ virtualenv==20.13.0
251
+ wcwidth==0.2.5
252
+ xxhash==3.0.0
253
+ yamllint==1.23.0
254
+ yarl==1.7.2
255
+
256
+
257
+ 2022-04-05 21:06:46,007 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/train.csv already exists, skipping data preparation!
258
+ 2022-04-05 21:06:46,007 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/dev.csv already exists, skipping data preparation!
259
+ 2022-04-05 21:06:46,007 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/test.csv already exists, skipping data preparation!
260
+ 2022-04-05 21:06:46,007 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer is already trained.
261
+ 2022-04-05 21:06:46,007 - speechbrain.tokenizers.SentencePiece - INFO - ==== Loading Tokenizer ===
262
+ 2022-04-05 21:06:46,007 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer path: results/wav2vec2_ctc_AMHARIC/1249/save/224_char.model
263
+ 2022-04-05 21:06:46,007 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer vocab_size: 224
264
+ 2022-04-05 21:06:46,007 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer type: char
265
+ 2022-04-05 21:06:46,145 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
266
+ 2022-04-05 21:06:46,145 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
267
+ 2022-04-05 21:06:49,137 - speechbrain.core - INFO - 318.8M trainable parameters in ASR
268
+ 2022-04-05 21:06:49,142 - speechbrain.utils.checkpoints - INFO - Would load a checkpoint here, but none found yet.
269
+ 2022-04-05 21:06:49,142 - speechbrain.utils.epoch_loop - INFO - Going into epoch 1
270
+ 2022-04-05 21:18:09,453 - speechbrain.core - ERROR - Exception:
271
+ Traceback (most recent call last):
272
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 366, in <module>
273
+ asr_brain.fit(
274
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/core.py", line 1034, in fit
275
+ loss = self.fit_batch(batch)
276
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 120, in fit_batch
277
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
278
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 61, in compute_forward
279
+ feats = self.modules.wav2vec2(wavs)
280
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
281
+ return forward_call(*input, **kwargs)
282
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/lobes/models/huggingface_wav2vec.py", line 254, in forward
283
+ return self.extract_features(wav)
284
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/lobes/models/huggingface_wav2vec.py", line 269, in extract_features
285
+ out = self.model(wav)[0]
286
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
287
+ return forward_call(*input, **kwargs)
288
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1253, in forward
289
+ encoder_outputs = self.encoder(
290
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
291
+ return forward_call(*input, **kwargs)
292
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 846, in forward
293
+ layer_outputs = layer(
294
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
295
+ return forward_call(*input, **kwargs)
296
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 687, in forward
297
+ hidden_states = hidden_states + self.feed_forward(self.final_layer_norm(hidden_states))
298
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
299
+ return forward_call(*input, **kwargs)
300
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 626, in forward
301
+ hidden_states = self.output_dense(hidden_states)
302
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
303
+ return forward_call(*input, **kwargs)
304
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 103, in forward
305
+ return F.linear(input, self.weight, self.bias)
306
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
307
+ return torch._C._nn.linear(input, weight, bias)
308
+ RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 11.91 GiB total capacity; 10.65 GiB already allocated; 17.25 MiB free; 10.95 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
309
+ 2022-04-06 10:21:42,642 - speechbrain.core - INFO - Beginning experiment!
310
+ 2022-04-06 10:21:42,643 - speechbrain.core - INFO - Experiment folder: results/wav2vec2_ctc_AMHARIC/1249
311
+ 2022-04-06 10:21:43,318 - speechbrain.utils.superpowers - DEBUG - aiohttp==3.8.1
312
+ aiosignal==1.2.0
313
+ appdirs==1.4.4
314
+ async-timeout==4.0.2
315
+ attrs==21.4.0
316
+ audioread==2.1.9
317
+ azure-core==1.21.1
318
+ azure-storage-blob==12.9.0
319
+ bcrypt==3.2.0
320
+ black==19.10b0
321
+ certifi==2021.10.8
322
+ cffi==1.15.0
323
+ cfgv==3.3.1
324
+ charset-normalizer==2.0.10
325
+ click==8.0.3
326
+ cryptography==36.0.1
327
+ datasets==1.13.3
328
+ decorator==5.1.1
329
+ dill==0.3.4
330
+ distlib==0.3.4
331
+ entrypoints==0.3
332
+ ffmpeg==1.4
333
+ filelock==3.4.2
334
+ flake8==3.7.9
335
+ frozenlist==1.3.0
336
+ fsspec==2022.2.0
337
+ huggingface-hub==0.2.1
338
+ HyperPyYAML==1.0.0
339
+ identify==2.4.4
340
+ idna==3.3
341
+ isodate==0.6.1
342
+ joblib==1.1.0
343
+ librosa==0.8.1
344
+ llvmlite==0.38.0
345
+ mccabe==0.6.1
346
+ more-itertools==8.12.0
347
+ msrest==0.6.21
348
+ multidict==6.0.2
349
+ multiprocess==0.70.12.2
350
+ mutagen==1.45.1
351
+ nodeenv==1.6.0
352
+ numba==0.55.0
353
+ numpy==1.21.5
354
+ oauthlib==3.1.1
355
+ packaging==21.3
356
+ pandas==1.3.5
357
+ paramiko==2.10.3
358
+ pathspec==0.9.0
359
+ platformdirs==2.4.1
360
+ pluggy==0.13.1
361
+ pooch==1.5.2
362
+ pre-commit==2.17.0
363
+ py==1.11.0
364
+ pyarrow==7.0.0
365
+ pycodestyle==2.5.0
366
+ pycparser==2.21
367
+ pyflakes==2.1.1
368
+ PyNaCl==1.5.0
369
+ pyparsing==3.0.6
370
+ pytest==5.4.1
371
+ python-dateutil==2.8.2
372
+ pytz==2021.3
373
+ PyYAML==6.0
374
+ regex==2022.1.18
375
+ requests==2.27.1
376
+ requests-oauthlib==1.3.0
377
+ resampy==0.2.2
378
+ ruamel.yaml==0.17.20
379
+ ruamel.yaml.clib==0.2.6
380
+ sacremoses==0.0.47
381
+ scikit-learn==1.0.2
382
+ scipy==1.7.3
383
+ scp==0.14.4
384
+ sentencepiece==0.1.96
385
+ six==1.16.0
386
+ SoundFile==0.10.3.post1
387
+ threadpoolctl==3.0.0
388
+ tokenizers==0.10.3
389
+ toml==0.10.2
390
+ torch==1.10.1
391
+ torchaudio==0.10.1
392
+ tqdm==4.62.3
393
+ transformers==4.13.0
394
+ typed-ast==1.5.1
395
+ typing_extensions==4.0.1
396
+ urllib3==1.26.8
397
+ virtualenv==20.13.0
398
+ wcwidth==0.2.5
399
+ xxhash==3.0.0
400
+ yamllint==1.23.0
401
+ yarl==1.7.2
402
+
403
+
404
+ 2022-04-06 10:21:43,439 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/train.csv already exists, skipping data preparation!
405
+ 2022-04-06 10:21:43,439 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/dev.csv already exists, skipping data preparation!
406
+ 2022-04-06 10:21:43,439 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/test.csv already exists, skipping data preparation!
407
+ 2022-04-06 10:21:43,439 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer is already trained.
408
+ 2022-04-06 10:21:43,439 - speechbrain.tokenizers.SentencePiece - INFO - ==== Loading Tokenizer ===
409
+ 2022-04-06 10:21:43,439 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer path: results/wav2vec2_ctc_AMHARIC/1249/save/224_char.model
410
+ 2022-04-06 10:21:43,440 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer vocab_size: 224
411
+ 2022-04-06 10:21:43,440 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer type: char
412
+ 2022-04-06 10:21:43,580 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
413
+ 2022-04-06 10:21:43,580 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
414
+ 2022-04-06 10:21:46,557 - speechbrain.core - INFO - 318.8M trainable parameters in ASR
415
+ 2022-04-06 10:21:46,562 - speechbrain.utils.checkpoints - INFO - Would load a checkpoint here, but none found yet.
416
+ 2022-04-06 10:21:46,562 - speechbrain.utils.epoch_loop - INFO - Going into epoch 1
417
+ 2022-04-06 10:45:30,095 - speechbrain.core - ERROR - Exception:
418
+ Traceback (most recent call last):
419
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 366, in <module>
420
+ asr_brain.fit(
421
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/core.py", line 1034, in fit
422
+ loss = self.fit_batch(batch)
423
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 120, in fit_batch
424
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
425
+ File "/data/n.abdoumohamed/DVoice/speechbrain/recipes/DVoice/ASR/CTC/train2.py", line 61, in compute_forward
426
+ feats = self.modules.wav2vec2(wavs)
427
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
428
+ return forward_call(*input, **kwargs)
429
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/lobes/models/huggingface_wav2vec.py", line 254, in forward
430
+ return self.extract_features(wav)
431
+ File "/data/n.abdoumohamed/DVoice/speechbrain/speechbrain/lobes/models/huggingface_wav2vec.py", line 269, in extract_features
432
+ out = self.model(wav)[0]
433
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
434
+ return forward_call(*input, **kwargs)
435
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 1253, in forward
436
+ encoder_outputs = self.encoder(
437
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
438
+ return forward_call(*input, **kwargs)
439
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 846, in forward
440
+ layer_outputs = layer(
441
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
442
+ return forward_call(*input, **kwargs)
443
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 682, in forward
444
+ hidden_states, attn_weights, _ = self.attention(
445
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
446
+ return forward_call(*input, **kwargs)
447
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py", line 586, in forward
448
+ attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
449
+ File "/home/n.abdoumohamed/.conda/envs/speechbrain/lib/python3.9/site-packages/torch/nn/functional.py", line 1169, in dropout
450
+ return _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training)
451
+ RuntimeError: CUDA out of memory. Tried to allocate 46.00 MiB (GPU 0; 11.91 GiB total capacity; 10.61 GiB already allocated; 5.25 MiB free; 10.96 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
452
+ 2022-04-07 01:24:51,494 - speechbrain.core - INFO - Beginning experiment!
453
+ 2022-04-07 01:24:51,498 - speechbrain.core - INFO - Experiment folder: results/wav2vec2_ctc_AMHARIC/1249
454
+ 2022-04-07 01:24:52,086 - speechbrain.utils.superpowers - DEBUG - aiohttp==3.8.1
455
+ aiosignal==1.2.0
456
+ appdirs==1.4.4
457
+ async-timeout==4.0.2
458
+ attrs==21.4.0
459
+ audioread==2.1.9
460
+ azure-core==1.21.1
461
+ azure-storage-blob==12.9.0
462
+ bcrypt==3.2.0
463
+ black==19.10b0
464
+ certifi==2021.10.8
465
+ cffi==1.15.0
466
+ cfgv==3.3.1
467
+ charset-normalizer==2.0.10
468
+ click==8.0.3
469
+ cryptography==36.0.1
470
+ datasets==1.13.3
471
+ decorator==5.1.1
472
+ dill==0.3.4
473
+ distlib==0.3.4
474
+ entrypoints==0.3
475
+ ffmpeg==1.4
476
+ filelock==3.4.2
477
+ flake8==3.7.9
478
+ frozenlist==1.3.0
479
+ fsspec==2022.2.0
480
+ huggingface-hub==0.2.1
481
+ HyperPyYAML==1.0.0
482
+ identify==2.4.4
483
+ idna==3.3
484
+ isodate==0.6.1
485
+ joblib==1.1.0
486
+ librosa==0.8.1
487
+ llvmlite==0.38.0
488
+ mccabe==0.6.1
489
+ more-itertools==8.12.0
490
+ msrest==0.6.21
491
+ multidict==6.0.2
492
+ multiprocess==0.70.12.2
493
+ mutagen==1.45.1
494
+ nodeenv==1.6.0
495
+ numba==0.55.0
496
+ numpy==1.21.5
497
+ oauthlib==3.1.1
498
+ packaging==21.3
499
+ pandas==1.3.5
500
+ paramiko==2.10.3
501
+ pathspec==0.9.0
502
+ platformdirs==2.4.1
503
+ pluggy==0.13.1
504
+ pooch==1.5.2
505
+ pre-commit==2.17.0
506
+ py==1.11.0
507
+ pyarrow==7.0.0
508
+ pycodestyle==2.5.0
509
+ pycparser==2.21
510
+ pyflakes==2.1.1
511
+ PyNaCl==1.5.0
512
+ pyparsing==3.0.6
513
+ pytest==5.4.1
514
+ python-dateutil==2.8.2
515
+ pytz==2021.3
516
+ PyYAML==6.0
517
+ regex==2022.1.18
518
+ requests==2.27.1
519
+ requests-oauthlib==1.3.0
520
+ resampy==0.2.2
521
+ ruamel.yaml==0.17.20
522
+ ruamel.yaml.clib==0.2.6
523
+ sacremoses==0.0.47
524
+ scikit-learn==1.0.2
525
+ scipy==1.7.3
526
+ scp==0.14.4
527
+ sentencepiece==0.1.96
528
+ six==1.16.0
529
+ SoundFile==0.10.3.post1
530
+ threadpoolctl==3.0.0
531
+ tokenizers==0.10.3
532
+ toml==0.10.2
533
+ torch==1.10.1
534
+ torchaudio==0.10.1
535
+ tqdm==4.62.3
536
+ transformers==4.13.0
537
+ typed-ast==1.5.1
538
+ typing_extensions==4.0.1
539
+ urllib3==1.26.8
540
+ virtualenv==20.13.0
541
+ wcwidth==0.2.5
542
+ xxhash==3.0.0
543
+ yamllint==1.23.0
544
+ yarl==1.7.2
545
+
546
+
547
+ 2022-04-07 01:24:52,319 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/train.csv already exists, skipping data preparation!
548
+ 2022-04-07 01:24:52,319 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/dev.csv already exists, skipping data preparation!
549
+ 2022-04-07 01:24:52,319 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/test.csv already exists, skipping data preparation!
550
+ 2022-04-07 01:24:52,323 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer is already trained.
551
+ 2022-04-07 01:24:52,323 - speechbrain.tokenizers.SentencePiece - INFO - ==== Loading Tokenizer ===
552
+ 2022-04-07 01:24:52,323 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer path: results/wav2vec2_ctc_AMHARIC/1249/save/224_char.model
553
+ 2022-04-07 01:24:52,323 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer vocab_size: 224
554
+ 2022-04-07 01:24:52,323 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer type: char
555
+ 2022-04-07 01:24:52,988 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
556
+ 2022-04-07 01:24:52,988 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
557
+ 2022-04-07 01:24:56,199 - speechbrain.core - INFO - 318.8M trainable parameters in ASR
558
+ 2022-04-07 01:24:56,215 - speechbrain.utils.checkpoints - INFO - Would load a checkpoint here, but none found yet.
559
+ 2022-04-07 01:24:56,215 - speechbrain.utils.epoch_loop - INFO - Going into epoch 1
560
+ 2022-04-07 01:39:30,997 - speechbrain.utils.train_logger - INFO - epoch: 1, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 2.16 - valid loss: 6.31e-01, valid CER: 17.94, valid WER: 61.08
561
+ 2022-04-07 01:41:12,908 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+01-39-31+00
562
+ 2022-04-07 01:41:13,081 - speechbrain.utils.epoch_loop - INFO - Going into epoch 2
563
+ 2022-04-07 01:54:26,673 - speechbrain.utils.train_logger - INFO - epoch: 2, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 7.60e-01 - valid loss: 4.76e-01, valid CER: 13.59, valid WER: 49.70
564
+ 2022-04-07 01:56:27,270 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+01-54-26+00
565
+ 2022-04-07 01:56:28,698 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+01-39-31+00
566
+ 2022-04-07 01:56:28,698 - speechbrain.utils.epoch_loop - INFO - Going into epoch 3
567
+ 2022-04-07 02:09:43,082 - speechbrain.utils.train_logger - INFO - epoch: 3, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 5.88e-01 - valid loss: 4.04e-01, valid CER: 11.77, valid WER: 43.35
568
+ 2022-04-07 02:14:21,215 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+02-09-43+00
569
+ 2022-04-07 02:14:22,071 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+01-54-26+00
570
+ 2022-04-07 02:14:22,071 - speechbrain.utils.epoch_loop - INFO - Going into epoch 4
571
+ 2022-04-07 02:27:35,021 - speechbrain.utils.train_logger - INFO - epoch: 4, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 4.97e-01 - valid loss: 3.75e-01, valid CER: 10.61, valid WER: 39.29
572
+ 2022-04-07 02:28:59,128 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+02-27-35+00
573
+ 2022-04-07 02:29:00,277 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+02-09-43+00
574
+ 2022-04-07 02:29:00,277 - speechbrain.utils.epoch_loop - INFO - Going into epoch 5
575
+ 2022-04-07 02:42:13,500 - speechbrain.utils.train_logger - INFO - epoch: 5, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 4.29e-01 - valid loss: 3.71e-01, valid CER: 10.15, valid WER: 37.38
576
+ 2022-04-07 02:47:20,435 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+02-42-13+00
577
+ 2022-04-07 02:47:21,615 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+02-27-35+00
578
+ 2022-04-07 02:47:21,615 - speechbrain.utils.epoch_loop - INFO - Going into epoch 6
579
+ 2022-04-07 03:00:32,602 - speechbrain.utils.train_logger - INFO - epoch: 6, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 3.81e-01 - valid loss: 3.54e-01, valid CER: 9.54, valid WER: 35.23
580
+ 2022-04-07 03:02:35,290 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+03-00-32+00
581
+ 2022-04-07 03:02:35,924 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+02-42-13+00
582
+ 2022-04-07 03:02:35,925 - speechbrain.utils.epoch_loop - INFO - Going into epoch 7
583
+ 2022-04-07 03:15:54,749 - speechbrain.utils.train_logger - INFO - epoch: 7, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 3.39e-01 - valid loss: 3.41e-01, valid CER: 8.98, valid WER: 33.48
584
+ 2022-04-07 03:17:35,622 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+03-15-54+00
585
+ 2022-04-07 03:17:36,628 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+03-00-32+00
586
+ 2022-04-07 03:17:36,628 - speechbrain.utils.epoch_loop - INFO - Going into epoch 8
587
+ 2022-04-07 03:30:53,354 - speechbrain.nnet.schedulers - INFO - Changing lr from 1 to 0.8
588
+ 2022-04-07 03:30:53,376 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.0001 to 9e-05
589
+ 2022-04-07 03:30:53,520 - speechbrain.utils.train_logger - INFO - epoch: 8, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 3.08e-01 - valid loss: 3.57e-01, valid CER: 8.80, valid WER: 32.41
590
+ 2022-04-07 03:33:47,026 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+03-30-53+00
591
+ 2022-04-07 03:33:48,209 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+03-15-54+00
592
+ 2022-04-07 03:33:48,209 - speechbrain.utils.epoch_loop - INFO - Going into epoch 9
593
+ 2022-04-07 03:46:59,930 - speechbrain.utils.train_logger - INFO - epoch: 9, lr_model: 8.00e-01, lr_wav2vec: 9.00e-05 - train loss: 2.70e-01 - valid loss: 3.46e-01, valid CER: 8.47, valid WER: 31.33
594
+ 2022-04-07 03:48:26,079 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+03-46-59+00
595
+ 2022-04-07 03:48:26,901 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+03-30-53+00
596
+ 2022-04-07 03:48:26,901 - speechbrain.utils.epoch_loop - INFO - Going into epoch 10
597
+ 2022-04-07 04:01:43,511 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.8 to 0.64
598
+ 2022-04-07 04:01:43,527 - speechbrain.nnet.schedulers - INFO - Changing lr from 9e-05 to 8.1e-05
599
+ 2022-04-07 04:01:43,843 - speechbrain.utils.train_logger - INFO - epoch: 10, lr_model: 8.00e-01, lr_wav2vec: 9.00e-05 - train loss: 2.45e-01 - valid loss: 3.64e-01, valid CER: 8.30, valid WER: 30.31
600
+ 2022-04-07 04:03:02,059 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+04-01-43+00
601
+ 2022-04-07 04:03:02,966 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+03-46-59+00
602
+ 2022-04-07 04:03:02,966 - speechbrain.utils.epoch_loop - INFO - Going into epoch 11
603
+ 2022-04-07 04:16:16,977 - speechbrain.utils.train_logger - INFO - epoch: 11, lr_model: 6.40e-01, lr_wav2vec: 8.10e-05 - train loss: 2.17e-01 - valid loss: 3.43e-01, valid CER: 8.00, valid WER: 29.91
604
+ 2022-04-07 04:17:41,125 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+04-16-16+00
605
+ 2022-04-07 04:17:42,305 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+04-01-43+00
606
+ 2022-04-07 04:17:42,305 - speechbrain.utils.epoch_loop - INFO - Going into epoch 12
607
+ 2022-04-07 04:30:56,259 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.64 to 0.51
608
+ 2022-04-07 04:30:56,276 - speechbrain.nnet.schedulers - INFO - Changing lr from 8.1e-05 to 7.3e-05
609
+ 2022-04-07 04:30:56,372 - speechbrain.utils.train_logger - INFO - epoch: 12, lr_model: 6.40e-01, lr_wav2vec: 8.10e-05 - train loss: 1.98e-01 - valid loss: 3.68e-01, valid CER: 7.93, valid WER: 29.49
610
+ 2022-04-07 04:33:36,676 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+04-30-56+00
611
+ 2022-04-07 04:33:38,939 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+04-16-16+00
612
+ 2022-04-07 04:33:38,939 - speechbrain.utils.epoch_loop - INFO - Going into epoch 13
613
+ 2022-04-07 04:46:50,071 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.51 to 0.41
614
+ 2022-04-07 04:46:50,283 - speechbrain.nnet.schedulers - INFO - Changing lr from 7.3e-05 to 6.6e-05
615
+ 2022-04-07 04:46:50,494 - speechbrain.utils.train_logger - INFO - epoch: 13, lr_model: 5.12e-01, lr_wav2vec: 7.29e-05 - train loss: 1.75e-01 - valid loss: 3.94e-01, valid CER: 7.78, valid WER: 29.09
616
+ 2022-04-07 04:49:19,543 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+04-46-50+00
617
+ 2022-04-07 04:49:20,921 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+04-30-56+00
618
+ 2022-04-07 04:49:20,921 - speechbrain.utils.epoch_loop - INFO - Going into epoch 14
619
+ 2022-04-07 05:02:32,796 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.41 to 0.33
620
+ 2022-04-07 05:02:32,805 - speechbrain.nnet.schedulers - INFO - Changing lr from 6.6e-05 to 5.9e-05
621
+ 2022-04-07 05:02:32,852 - speechbrain.utils.train_logger - INFO - epoch: 14, lr_model: 4.10e-01, lr_wav2vec: 6.56e-05 - train loss: 1.58e-01 - valid loss: 3.94e-01, valid CER: 7.75, valid WER: 28.90
622
+ 2022-04-07 05:04:12,624 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+05-02-32+00
623
+ 2022-04-07 05:04:14,019 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+04-46-50+00
624
+ 2022-04-07 05:04:14,019 - speechbrain.utils.epoch_loop - INFO - Going into epoch 15
625
+ 2022-04-07 05:17:40,541 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.33 to 0.26
626
+ 2022-04-07 05:17:40,556 - speechbrain.nnet.schedulers - INFO - Changing lr from 5.9e-05 to 5.3e-05
627
+ 2022-04-07 05:17:40,606 - speechbrain.utils.train_logger - INFO - epoch: 15, lr_model: 3.28e-01, lr_wav2vec: 5.90e-05 - train loss: 1.37e-01 - valid loss: 4.13e-01, valid CER: 7.64, valid WER: 28.39
628
+ 2022-04-07 05:19:24,936 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+05-17-40+00
629
+ 2022-04-07 05:19:25,870 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+05-02-32+00
630
+ 2022-04-07 05:19:25,870 - speechbrain.utils.epoch_loop - INFO - Going into epoch 16
631
+ 2022-04-07 05:32:43,793 - speechbrain.utils.train_logger - INFO - epoch: 16, lr_model: 2.62e-01, lr_wav2vec: 5.31e-05 - train loss: 1.25e-01 - valid loss: 3.95e-01, valid CER: 7.48, valid WER: 27.94
632
+ 2022-04-07 05:34:31,869 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+05-32-43+00
633
+ 2022-04-07 05:34:32,562 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+05-17-40+00
634
+ 2022-04-07 05:34:32,563 - speechbrain.utils.epoch_loop - INFO - Going into epoch 17
635
+ 2022-04-07 05:47:42,780 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.26 to 0.21
636
+ 2022-04-07 05:47:42,796 - speechbrain.nnet.schedulers - INFO - Changing lr from 5.3e-05 to 4.8e-05
637
+ 2022-04-07 05:47:42,879 - speechbrain.utils.train_logger - INFO - epoch: 17, lr_model: 2.62e-01, lr_wav2vec: 5.31e-05 - train loss: 1.20e-01 - valid loss: 4.12e-01, valid CER: 7.36, valid WER: 27.73
638
+ 2022-04-07 05:50:08,133 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+05-47-42+00
639
+ 2022-04-07 05:50:11,233 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+05-32-43+00
640
+ 2022-04-07 05:50:11,244 - speechbrain.utils.epoch_loop - INFO - Going into epoch 18
641
+ 2022-04-07 06:03:30,233 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.21 to 0.17
642
+ 2022-04-07 06:03:30,246 - speechbrain.nnet.schedulers - INFO - Changing lr from 4.8e-05 to 4.3e-05
643
+ 2022-04-07 06:03:30,328 - speechbrain.utils.train_logger - INFO - epoch: 18, lr_model: 2.10e-01, lr_wav2vec: 4.78e-05 - train loss: 1.03e-01 - valid loss: 4.31e-01, valid CER: 7.44, valid WER: 27.69
644
+ 2022-04-07 06:07:59,476 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+06-03-30+00
645
+ 2022-04-07 06:08:02,491 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+05-47-42+00
646
+ 2022-04-07 06:08:02,503 - speechbrain.utils.epoch_loop - INFO - Going into epoch 19
647
+ 2022-04-07 06:21:15,279 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.17 to 0.13
648
+ 2022-04-07 06:21:15,296 - speechbrain.nnet.schedulers - INFO - Changing lr from 4.3e-05 to 3.9e-05
649
+ 2022-04-07 06:21:15,414 - speechbrain.utils.train_logger - INFO - epoch: 19, lr_model: 1.68e-01, lr_wav2vec: 4.30e-05 - train loss: 9.82e-02 - valid loss: 4.35e-01, valid CER: 7.28, valid WER: 27.08
650
+ 2022-04-07 06:23:34,904 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+06-21-15+00
651
+ 2022-04-07 06:23:36,249 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+06-03-30+00
652
+ 2022-04-07 06:23:36,250 - speechbrain.utils.epoch_loop - INFO - Going into epoch 20
653
+ 2022-04-07 06:36:49,810 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.13 to 0.11
654
+ 2022-04-07 06:36:49,823 - speechbrain.nnet.schedulers - INFO - Changing lr from 3.9e-05 to 3.5e-05
655
+ 2022-04-07 06:36:49,890 - speechbrain.utils.train_logger - INFO - epoch: 20, lr_model: 1.34e-01, lr_wav2vec: 3.87e-05 - train loss: 8.78e-02 - valid loss: 4.42e-01, valid CER: 7.27, valid WER: 27.18
656
+ 2022-04-07 06:39:22,101 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+06-36-49+00
657
+ 2022-04-07 06:39:22,945 - speechbrain.utils.epoch_loop - INFO - Going into epoch 21
658
+ 2022-04-07 06:52:36,778 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.11 to 0.086
659
+ 2022-04-07 06:52:36,782 - speechbrain.nnet.schedulers - INFO - Changing lr from 3.5e-05 to 3.1e-05
660
+ 2022-04-07 06:52:36,852 - speechbrain.utils.train_logger - INFO - epoch: 21, lr_model: 1.07e-01, lr_wav2vec: 3.49e-05 - train loss: 8.27e-02 - valid loss: 4.72e-01, valid CER: 7.16, valid WER: 26.79
661
+ 2022-04-07 06:55:10,567 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+06-52-36+00
662
+ 2022-04-07 06:55:12,223 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+06-21-15+00
663
+ 2022-04-07 06:55:12,905 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+06-36-49+00
664
+ 2022-04-07 06:55:12,905 - speechbrain.utils.epoch_loop - INFO - Going into epoch 22
665
+ 2022-04-07 07:08:24,492 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.086 to 0.069
666
+ 2022-04-07 07:08:24,497 - speechbrain.nnet.schedulers - INFO - Changing lr from 3.1e-05 to 2.8e-05
667
+ 2022-04-07 07:08:24,542 - speechbrain.utils.train_logger - INFO - epoch: 22, lr_model: 8.59e-02, lr_wav2vec: 3.14e-05 - train loss: 7.39e-02 - valid loss: 4.88e-01, valid CER: 7.03, valid WER: 26.28
668
+ 2022-04-07 07:10:26,930 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+07-08-24+00
669
+ 2022-04-07 07:10:29,254 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+06-52-36+00
670
+ 2022-04-07 07:10:29,254 - speechbrain.utils.epoch_loop - INFO - Going into epoch 23
671
+ 2022-04-07 07:23:43,676 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.069 to 0.055
672
+ 2022-04-07 07:23:43,692 - speechbrain.nnet.schedulers - INFO - Changing lr from 2.8e-05 to 2.5e-05
673
+ 2022-04-07 07:23:43,814 - speechbrain.utils.train_logger - INFO - epoch: 23, lr_model: 6.87e-02, lr_wav2vec: 2.82e-05 - train loss: 7.24e-02 - valid loss: 4.92e-01, valid CER: 6.95, valid WER: 26.03
674
+ 2022-04-07 07:29:42,385 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+07-23-43+00
675
+ 2022-04-07 07:29:44,066 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+07-08-24+00
676
+ 2022-04-07 07:29:44,066 - speechbrain.utils.epoch_loop - INFO - Going into epoch 24
677
+ 2022-04-07 07:43:02,440 - speechbrain.utils.train_logger - INFO - epoch: 24, lr_model: 5.50e-02, lr_wav2vec: 2.54e-05 - train loss: 6.76e-02 - valid loss: 4.80e-01, valid CER: 6.89, valid WER: 26.11
678
+ 2022-04-07 07:48:09,873 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+07-43-02+00
679
+ 2022-04-07 07:48:12,476 - speechbrain.utils.epoch_loop - INFO - Going into epoch 25
680
+ 2022-04-07 08:01:25,251 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.055 to 0.044
681
+ 2022-04-07 08:01:25,297 - speechbrain.nnet.schedulers - INFO - Changing lr from 2.5e-05 to 2.3e-05
682
+ 2022-04-07 08:01:25,429 - speechbrain.utils.train_logger - INFO - epoch: 25, lr_model: 5.50e-02, lr_wav2vec: 2.54e-05 - train loss: 6.21e-02 - valid loss: 5.01e-01, valid CER: 6.88, valid WER: 25.88
683
+ 2022-04-07 08:03:25,750 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+08-01-25+00
684
+ 2022-04-07 08:03:27,595 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+07-43-02+00
685
+ 2022-04-07 08:03:28,215 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+07-23-43+00
686
+ 2022-04-07 08:03:28,215 - speechbrain.utils.epoch_loop - INFO - Going into epoch 26
687
+ 2022-04-07 08:16:35,211 - speechbrain.utils.train_logger - INFO - epoch: 26, lr_model: 4.40e-02, lr_wav2vec: 2.29e-05 - train loss: 5.94e-02 - valid loss: 4.89e-01, valid CER: 6.90, valid WER: 26.08
688
+ 2022-04-07 08:19:37,126 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+08-16-35+00
689
+ 2022-04-07 08:19:39,166 - speechbrain.utils.epoch_loop - INFO - Going into epoch 27
690
+ 2022-04-07 08:32:51,788 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.044 to 0.035
691
+ 2022-04-07 08:32:51,994 - speechbrain.nnet.schedulers - INFO - Changing lr from 2.3e-05 to 2.1e-05
692
+ 2022-04-07 08:32:52,086 - speechbrain.utils.train_logger - INFO - epoch: 27, lr_model: 4.40e-02, lr_wav2vec: 2.29e-05 - train loss: 5.54e-02 - valid loss: 5.09e-01, valid CER: 6.87, valid WER: 26.04
693
+ 2022-04-07 08:35:48,324 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+08-32-52+00
694
+ 2022-04-07 08:35:51,566 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+08-16-35+00
695
+ 2022-04-07 08:35:51,582 - speechbrain.utils.epoch_loop - INFO - Going into epoch 28
696
+ 2022-04-07 08:48:53,259 - speechbrain.utils.train_logger - INFO - epoch: 28, lr_model: 3.52e-02, lr_wav2vec: 2.06e-05 - train loss: 5.55e-02 - valid loss: 5.01e-01, valid CER: 6.76, valid WER: 25.67
697
+ 2022-04-07 08:50:45,339 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+08-48-53+00
698
+ 2022-04-07 08:50:46,363 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+08-32-52+00
699
+ 2022-04-07 08:50:47,117 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+08-01-25+00
700
+ 2022-04-07 08:50:47,117 - speechbrain.utils.epoch_loop - INFO - Going into epoch 29
701
+ 2022-04-07 09:03:47,837 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.035 to 0.028
702
+ 2022-04-07 09:03:47,840 - speechbrain.nnet.schedulers - INFO - Changing lr from 2.1e-05 to 1.9e-05
703
+ 2022-04-07 09:03:47,887 - speechbrain.utils.train_logger - INFO - epoch: 29, lr_model: 3.52e-02, lr_wav2vec: 2.06e-05 - train loss: 5.44e-02 - valid loss: 5.10e-01, valid CER: 6.72, valid WER: 25.62
704
+ 2022-04-07 09:06:30,053 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+09-03-47+00
705
+ 2022-04-07 09:06:32,452 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+08-48-53+00
706
+ 2022-04-07 09:06:32,452 - speechbrain.utils.epoch_loop - INFO - Going into epoch 30
707
+ 2022-04-07 09:19:28,245 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.028 to 0.023
708
+ 2022-04-07 09:19:28,266 - speechbrain.nnet.schedulers - INFO - Changing lr from 1.9e-05 to 1.7e-05
709
+ 2022-04-07 09:19:28,342 - speechbrain.utils.train_logger - INFO - epoch: 30, lr_model: 2.81e-02, lr_wav2vec: 1.85e-05 - train loss: 5.01e-02 - valid loss: 5.29e-01, valid CER: 6.71, valid WER: 25.50
710
+ 2022-04-07 09:21:20,512 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+09-19-28+00
711
+ 2022-04-07 09:21:22,686 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+09-03-47+00
712
+ 2022-04-07 09:21:22,696 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+09-19-28+00
713
+ 2022-04-07 09:21:42,982 - root - DEBUG - SaveableDataLoader was requested to load a checkpoint, but the DataLoader has already been iterated. The DataLoader file will be ignored. This is normal in evaluation, when a checkpoint is loaded just to retrieve the best model.
714
+ 2022-04-07 09:24:40,071 - speechbrain.utils.train_logger - INFO - Epoch loaded: 30 - test loss: 5.24e-01, test CER: 6.57, test WER: 24.92
715
+ 2022-04-07 09:25:00,438 - speechbrain.core - INFO - Beginning experiment!
716
+ 2022-04-07 09:25:00,447 - speechbrain.core - INFO - Experiment folder: results/wav2vec2_ctc_AMHARIC/1249
717
+ 2022-04-07 09:25:01,071 - speechbrain.utils.superpowers - DEBUG - aiohttp==3.8.1
718
+ aiosignal==1.2.0
719
+ appdirs==1.4.4
720
+ async-timeout==4.0.2
721
+ attrs==21.4.0
722
+ audioread==2.1.9
723
+ azure-core==1.21.1
724
+ azure-storage-blob==12.9.0
725
+ bcrypt==3.2.0
726
+ black==19.10b0
727
+ certifi==2021.10.8
728
+ cffi==1.15.0
729
+ cfgv==3.3.1
730
+ charset-normalizer==2.0.10
731
+ click==8.0.3
732
+ cryptography==36.0.1
733
+ datasets==1.13.3
734
+ decorator==5.1.1
735
+ dill==0.3.4
736
+ distlib==0.3.4
737
+ entrypoints==0.3
738
+ ffmpeg==1.4
739
+ filelock==3.4.2
740
+ flake8==3.7.9
741
+ frozenlist==1.3.0
742
+ fsspec==2022.2.0
743
+ huggingface-hub==0.2.1
744
+ HyperPyYAML==1.0.0
745
+ identify==2.4.4
746
+ idna==3.3
747
+ isodate==0.6.1
748
+ joblib==1.1.0
749
+ librosa==0.8.1
750
+ llvmlite==0.38.0
751
+ mccabe==0.6.1
752
+ more-itertools==8.12.0
753
+ msrest==0.6.21
754
+ multidict==6.0.2
755
+ multiprocess==0.70.12.2
756
+ mutagen==1.45.1
757
+ nodeenv==1.6.0
758
+ numba==0.55.0
759
+ numpy==1.21.5
760
+ oauthlib==3.1.1
761
+ packaging==21.3
762
+ pandas==1.3.5
763
+ paramiko==2.10.3
764
+ pathspec==0.9.0
765
+ platformdirs==2.4.1
766
+ pluggy==0.13.1
767
+ pooch==1.5.2
768
+ pre-commit==2.17.0
769
+ py==1.11.0
770
+ pyarrow==7.0.0
771
+ pycodestyle==2.5.0
772
+ pycparser==2.21
773
+ pyflakes==2.1.1
774
+ PyNaCl==1.5.0
775
+ pyparsing==3.0.6
776
+ pytest==5.4.1
777
+ python-dateutil==2.8.2
778
+ pytz==2021.3
779
+ PyYAML==6.0
780
+ regex==2022.1.18
781
+ requests==2.27.1
782
+ requests-oauthlib==1.3.0
783
+ resampy==0.2.2
784
+ ruamel.yaml==0.17.20
785
+ ruamel.yaml.clib==0.2.6
786
+ sacremoses==0.0.47
787
+ scikit-learn==1.0.2
788
+ scipy==1.7.3
789
+ scp==0.14.4
790
+ sentencepiece==0.1.96
791
+ six==1.16.0
792
+ SoundFile==0.10.3.post1
793
+ threadpoolctl==3.0.0
794
+ tokenizers==0.10.3
795
+ toml==0.10.2
796
+ torch==1.10.1
797
+ torchaudio==0.10.1
798
+ tqdm==4.62.3
799
+ transformers==4.13.0
800
+ typed-ast==1.5.1
801
+ typing_extensions==4.0.1
802
+ urllib3==1.26.8
803
+ virtualenv==20.13.0
804
+ wcwidth==0.2.5
805
+ xxhash==3.0.0
806
+ yamllint==1.23.0
807
+ yarl==1.7.2
808
+
809
+
810
+ 2022-04-07 09:25:01,407 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/train.csv already exists, skipping data preparation!
811
+ 2022-04-07 09:25:01,407 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/dev.csv already exists, skipping data preparation!
812
+ 2022-04-07 09:25:01,407 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/test.csv already exists, skipping data preparation!
813
+ 2022-04-07 09:25:01,413 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer is already trained.
814
+ 2022-04-07 09:25:01,413 - speechbrain.tokenizers.SentencePiece - INFO - ==== Loading Tokenizer ===
815
+ 2022-04-07 09:25:01,413 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer path: results/wav2vec2_ctc_AMHARIC/1249/save/224_char.model
816
+ 2022-04-07 09:25:01,413 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer vocab_size: 224
817
+ 2022-04-07 09:25:01,413 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer type: char
818
+ 2022-04-07 09:25:01,591 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
819
+ 2022-04-07 09:25:01,591 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
820
+ 2022-04-07 09:25:05,095 - speechbrain.core - INFO - 318.8M trainable parameters in ASR
821
+ 2022-04-07 09:25:05,382 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+09-19-28+00
822
+ 2022-04-07 09:25:08,583 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+09-19-28+00
823
+ 2022-04-07 09:26:16,796 - speechbrain.utils.train_logger - INFO - Epoch loaded: 30 - test loss: 5.24e-01, test CER: 6.57, test WER: 24.92
824
+ 2022-05-27 08:17:50,151 - speechbrain.core - INFO - Beginning experiment!
825
+ 2022-05-27 08:17:50,151 - speechbrain.core - INFO - Experiment folder: results/wav2vec2_ctc_AMHARIC/1249
826
+ 2022-05-27 08:17:54,273 - speechbrain.utils.superpowers - DEBUG - aiohttp==3.8.1
827
+ aiosignal==1.2.0
828
+ appdirs==1.4.4
829
+ async-timeout==4.0.2
830
+ attrs==21.4.0
831
+ audioread==2.1.9
832
+ audiosegment==0.23.0
833
+ azure-core==1.21.1
834
+ azure-storage-blob==12.9.0
835
+ bcrypt==3.2.0
836
+ black==19.10b0
837
+ certifi==2021.10.8
838
+ cffi==1.15.0
839
+ cfgv==3.3.1
840
+ charset-normalizer==2.0.10
841
+ click==8.0.3
842
+ cryptography==36.0.1
843
+ datasets==1.13.3
844
+ decorator==5.1.1
845
+ dill==0.3.4
846
+ distlib==0.3.4
847
+ entrypoints==0.3
848
+ ffmpeg==1.4
849
+ filelock==3.4.2
850
+ flake8==3.7.9
851
+ frozenlist==1.3.0
852
+ fsspec==2022.2.0
853
+ huggingface-hub==0.5.1
854
+ HyperPyYAML==1.0.1
855
+ identify==2.4.4
856
+ idna==3.3
857
+ isodate==0.6.1
858
+ joblib==1.1.0
859
+ librosa==0.8.1
860
+ llvmlite==0.38.0
861
+ mccabe==0.6.1
862
+ more-itertools==8.12.0
863
+ msrest==0.6.21
864
+ multidict==6.0.2
865
+ multiprocess==0.70.12.2
866
+ mutagen==1.45.1
867
+ nodeenv==1.6.0
868
+ numba==0.55.0
869
+ numpy==1.21.5
870
+ oauthlib==3.1.1
871
+ packaging==21.3
872
+ pandas==1.3.5
873
+ paramiko==2.10.3
874
+ pathspec==0.9.0
875
+ platformdirs==2.4.1
876
+ pluggy==0.13.1
877
+ pooch==1.5.2
878
+ pre-commit==2.17.0
879
+ py==1.11.0
880
+ pyarrow==7.0.0
881
+ pycodestyle==2.5.0
882
+ pycparser==2.21
883
+ pydub==0.25.1
884
+ pyflakes==2.1.1
885
+ PyNaCl==1.5.0
886
+ pyparsing==3.0.6
887
+ pytest==5.4.1
888
+ python-dateutil==2.8.2
889
+ pytz==2021.3
890
+ PyYAML==6.0
891
+ regex==2022.1.18
892
+ requests==2.27.1
893
+ requests-oauthlib==1.3.0
894
+ resampy==0.2.2
895
+ ruamel.yaml==0.17.21
896
+ ruamel.yaml.clib==0.2.6
897
+ sacremoses==0.0.53
898
+ scikit-learn==1.0.2
899
+ scipy==1.7.3
900
+ scp==0.14.4
901
+ sentencepiece==0.1.96
902
+ six==1.16.0
903
+ SoundFile==0.10.3.post1
904
+ speechbrain==0.5.11
905
+ threadpoolctl==3.0.0
906
+ tokenizers==0.12.1
907
+ toml==0.10.2
908
+ torch==1.11.0
909
+ torchaudio==0.11.0
910
+ tqdm==4.62.3
911
+ transformers==4.18.0
912
+ typed-ast==1.5.1
913
+ typing_extensions==4.0.1
914
+ urllib3==1.26.8
915
+ virtualenv==20.13.0
916
+ wcwidth==0.2.5
917
+ webrtcvad==2.0.10
918
+ xxhash==3.0.0
919
+ yamllint==1.23.0
920
+ yarl==1.7.2
921
+ youtube-dl==2021.12.17
922
+
923
+
924
+ 2022-05-27 08:19:01,150 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/train.csv already exists, skipping data preparation!
925
+ 2022-05-27 08:19:01,151 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/dev.csv already exists, skipping data preparation!
926
+ 2022-05-27 08:19:01,151 - dvoice_prepare - INFO - results/wav2vec2_ctc_AMHARIC/1249/save/test.csv already exists, skipping data preparation!
927
+ 2022-05-27 08:19:01,160 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer is already trained.
928
+ 2022-05-27 08:19:01,160 - speechbrain.tokenizers.SentencePiece - INFO - ==== Loading Tokenizer ===
929
+ 2022-05-27 08:19:01,160 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer path: results/wav2vec2_ctc_AMHARIC/1249/save/224_char.model
930
+ 2022-05-27 08:19:01,160 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer vocab_size: 224
931
+ 2022-05-27 08:19:01,160 - speechbrain.tokenizers.SentencePiece - INFO - Tokenizer type: char
932
+ 2022-05-27 08:19:01,425 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
933
+ 2022-05-27 08:19:01,425 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
934
+ 2022-05-27 08:19:09,868 - speechbrain.core - INFO - 318.8M trainable parameters in ASR
935
+ 2022-05-27 08:19:09,899 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+09-19-28+00
936
+ 2022-05-27 08:20:01,646 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/wav2vec2_ctc_AMHARIC/1249/save/CKPT+2022-04-07+09-19-28+00
937
+ 2022-05-27 08:20:59,252 - speechbrain.utils.train_logger - INFO - Epoch loaded: 30 - test loss: 5.24e-01, test CER: 6.57, test WER: 24.92
Training/train.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import sys
3
+
4
+
5
+
6
+ sys.path
7
+ sys.path.append('/data/n.abdoumohamed/DVoice/speechbrain')
8
+
9
+
10
+ import torch
11
+ import logging
12
+ import speechbrain as sb
13
+ import torchaudio
14
+ from hyperpyyaml import load_hyperpyyaml
15
+ from speechbrain.tokenizers.SentencePiece import SentencePiece
16
+ from speechbrain.utils.data_utils import undo_padding
17
+ from speechbrain.utils.distributed import run_on_main
18
+
19
+ """Recipe for training a sequence-to-sequence ASR system with CommonVoice.
20
+ The system employs a wav2vec2 encoder and a CTC decoder.
21
+ Decoding is performed with greedy decoding (will be extended to beam search).
22
+
23
+ To run this recipe, do the following:
24
+ > python train_with_wav2vec2.py hparams/train_with_wav2vec2.yaml
25
+
26
+ With the default hyperparameters, the system employs a pretrained wav2vec2 encoder.
27
+ The wav2vec2 model is pretrained following the model given in the hprams file.
28
+ It may be dependent on the language.
29
+
30
+ The neural network is trained with CTC on sub-word units estimated with
31
+ Byte Pairwise Encoding (BPE).
32
+
33
+ The experiment file is flexible enough to support a large variety of
34
+ different systems. By properly changing the parameter files, you can try
35
+ different encoders, decoders, tokens (e.g, characters instead of BPE),
36
+ training languages (all CommonVoice languages), and many
37
+ other possible variations.
38
+
39
+ Authors
40
+ * Titouan Parcollet 2021
41
+ """
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ # Define training procedure
47
+ class ASR(sb.core.Brain):
48
+ def compute_forward(self, batch, stage):
49
+ """Forward computations from the waveform batches to the output probabilities."""
50
+
51
+ batch = batch.to(self.device)
52
+ wavs, wav_lens = batch.sig
53
+ tokens_bos, _ = batch.tokens_bos
54
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
55
+
56
+ if stage == sb.Stage.TRAIN:
57
+ if hasattr(self.hparams, "augmentation"):
58
+ wavs = self.hparams.augmentation(wavs, wav_lens)
59
+
60
+ # Forward pass
61
+ feats = self.modules.wav2vec2(wavs)
62
+ x = self.modules.enc(feats)
63
+ logits = self.modules.ctc_lin(x)
64
+ p_ctc = self.hparams.log_softmax(logits)
65
+
66
+ return p_ctc, wav_lens
67
+
68
+ def compute_objectives(self, predictions, batch, stage):
69
+ """Computes the loss (CTC) given predictions and targets."""
70
+
71
+ p_ctc, wav_lens = predictions
72
+
73
+ ids = batch.id
74
+ tokens_eos, tokens_eos_lens = batch.tokens_eos
75
+ tokens, tokens_lens = batch.tokens
76
+
77
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
78
+
79
+ if stage != sb.Stage.TRAIN:
80
+ # Decode token terms to words
81
+ sequence = sb.decoders.ctc_greedy_decode(
82
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
83
+ )
84
+
85
+ predicted_words = self.tokenizer(sequence, task="decode_from_list")
86
+
87
+ # Convert indices to words
88
+ target_words = undo_padding(tokens, tokens_lens)
89
+ target_words = self.tokenizer(target_words, task="decode_from_list")
90
+
91
+ self.wer_metric.append(ids, predicted_words, target_words)
92
+ self.cer_metric.append(ids, predicted_words, target_words)
93
+
94
+ return loss
95
+
96
+ def fit_batch(self, batch):
97
+ """Train the parameters given a single batch in input"""
98
+ if self.auto_mix_prec:
99
+
100
+ if not self.hparams.wav2vec2.freeze:
101
+ self.wav2vec_optimizer.zero_grad()
102
+ self.model_optimizer.zero_grad()
103
+
104
+ with torch.cuda.amp.autocast():
105
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
106
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
107
+
108
+ self.scaler.scale(loss).backward()
109
+ if not self.hparams.wav2vec2.freeze:
110
+ self.scaler.unscale_(self.wav2vec_optimizer)
111
+ self.scaler.unscale_(self.model_optimizer)
112
+
113
+ if self.check_gradients(loss):
114
+ if not self.hparams.wav2vec2.freeze:
115
+ self.scaler.step(self.wav2vec_optimizer)
116
+ self.scaler.step(self.model_optimizer)
117
+
118
+ self.scaler.update()
119
+ else:
120
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
121
+
122
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
123
+ loss.backward()
124
+
125
+ if self.check_gradients(loss):
126
+ if not self.hparams.wav2vec2.freeze:
127
+ self.wav2vec_optimizer.step()
128
+ self.model_optimizer.step()
129
+
130
+ if not self.hparams.wav2vec2.freeze:
131
+ self.wav2vec_optimizer.zero_grad()
132
+ self.model_optimizer.zero_grad()
133
+
134
+ return loss.detach()
135
+
136
+ def evaluate_batch(self, batch, stage):
137
+ """Computations needed for validation/test batches"""
138
+ predictions = self.compute_forward(batch, stage=stage)
139
+ with torch.no_grad():
140
+ loss = self.compute_objectives(predictions, batch, stage=stage)
141
+ return loss.detach()
142
+
143
+ def on_stage_start(self, stage, epoch):
144
+ """Gets called at the beginning of each epoch"""
145
+ if stage != sb.Stage.TRAIN:
146
+ self.cer_metric = self.hparams.cer_computer()
147
+ self.wer_metric = self.hparams.error_rate_computer()
148
+
149
+ def on_stage_end(self, stage, stage_loss, epoch):
150
+ """Gets called at the end of an epoch."""
151
+ # Compute/store important stats
152
+ stage_stats = {"loss": stage_loss}
153
+ if stage == sb.Stage.TRAIN:
154
+ self.train_stats = stage_stats
155
+ else:
156
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
157
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
158
+
159
+ # Perform end-of-iteration things, like annealing, logging, etc.
160
+ if stage == sb.Stage.VALID:
161
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
162
+ stage_stats["loss"]
163
+ )
164
+ old_lr_wav2vec, new_lr_wav2vec = self.hparams.lr_annealing_wav2vec(
165
+ stage_stats["loss"]
166
+ )
167
+ sb.nnet.schedulers.update_learning_rate(
168
+ self.model_optimizer, new_lr_model
169
+ )
170
+ if not self.hparams.wav2vec2.freeze:
171
+ sb.nnet.schedulers.update_learning_rate(
172
+ self.wav2vec_optimizer, new_lr_wav2vec
173
+ )
174
+ self.hparams.train_logger.log_stats(
175
+ stats_meta={
176
+ "epoch": epoch,
177
+ "lr_model": old_lr_model,
178
+ "lr_wav2vec": old_lr_wav2vec,
179
+ },
180
+ train_stats=self.train_stats,
181
+ valid_stats=stage_stats,
182
+ )
183
+ self.checkpointer.save_and_keep_only(
184
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
185
+ )
186
+ elif stage == sb.Stage.TEST:
187
+ self.hparams.train_logger.log_stats(
188
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
189
+ test_stats=stage_stats,
190
+ )
191
+ with open(self.hparams.wer_file, "w") as w:
192
+ self.wer_metric.write_stats(w)
193
+
194
+ def init_optimizers(self):
195
+ "Initializes the wav2vec2 optimizer and model optimizer"
196
+
197
+ # If the wav2vec encoder is unfrozen, we create the optimizer
198
+ if not self.hparams.wav2vec2.freeze:
199
+ self.wav2vec_optimizer = self.hparams.wav2vec_opt_class(
200
+ self.modules.wav2vec2.parameters()
201
+ )
202
+ if self.checkpointer is not None:
203
+ self.checkpointer.add_recoverable(
204
+ "wav2vec_opt", self.wav2vec_optimizer
205
+ )
206
+
207
+ self.model_optimizer = self.hparams.model_opt_class(
208
+ self.hparams.model.parameters()
209
+ )
210
+
211
+ if self.checkpointer is not None:
212
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
213
+
214
+
215
+ # Define custom data procedure
216
+ def dataio_prepare(hparams, tokenizer):
217
+ """This function prepares the datasets to be used in the brain class.
218
+ It also defines the data processing pipeline through user-defined functions."""
219
+
220
+ # 1. Define datasets
221
+ data_folder = hparams["data_folder"]
222
+
223
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
224
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
225
+ )
226
+
227
+ if hparams["sorting"] == "ascending":
228
+ # we sort training data to speed up training and get better results.
229
+ train_data = train_data.filtered_sorted(
230
+ sort_key="duration",
231
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
232
+ )
233
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
234
+ hparams["dataloader_options"]["shuffle"] = False
235
+
236
+ elif hparams["sorting"] == "descending":
237
+ train_data = train_data.filtered_sorted(
238
+ sort_key="duration",
239
+ reverse=True,
240
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
241
+ )
242
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
243
+ hparams["dataloader_options"]["shuffle"] = False
244
+
245
+ elif hparams["sorting"] == "random":
246
+ pass
247
+
248
+ else:
249
+ raise NotImplementedError(
250
+ "sorting must be random, ascending or descending"
251
+ )
252
+
253
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
254
+ csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
255
+ )
256
+ # We also sort the validation data so it is faster to validate
257
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
258
+
259
+ test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
260
+ csv_path=hparams["test_csv"], replacements={"data_root": data_folder},
261
+ )
262
+
263
+ # We also sort the validation data so it is faster to validate
264
+ test_data = test_data.filtered_sorted(sort_key="duration")
265
+
266
+ datasets = [train_data, valid_data, test_data]
267
+
268
+ # 2. Define audio pipeline:
269
+ @sb.utils.data_pipeline.takes("wav")
270
+ @sb.utils.data_pipeline.provides("sig")
271
+ def audio_pipeline(wav):
272
+ info = torchaudio.info(wav)
273
+ sig = sb.dataio.dataio.read_audio(wav)
274
+ resampled = torchaudio.transforms.Resample(
275
+ info.sample_rate, hparams["sample_rate"],
276
+ )(sig)
277
+ return resampled
278
+
279
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
280
+
281
+ # 3. Define text pipeline:
282
+ @sb.utils.data_pipeline.takes("wrd")
283
+ @sb.utils.data_pipeline.provides(
284
+ "tokens_list", "tokens_bos", "tokens_eos", "tokens"
285
+ )
286
+ def text_pipeline(wrd):
287
+ tokens_list = tokenizer.sp.encode_as_ids(wrd)
288
+ yield tokens_list
289
+ tokens_bos = torch.LongTensor([hparams["bos_index"]] + (tokens_list))
290
+ yield tokens_bos
291
+ tokens_eos = torch.LongTensor(tokens_list + [hparams["eos_index"]])
292
+ yield tokens_eos
293
+ tokens = torch.LongTensor(tokens_list)
294
+ yield tokens
295
+
296
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
297
+
298
+ # 4. Set output:
299
+ sb.dataio.dataset.set_output_keys(
300
+ datasets, ["id", "sig", "tokens_bos", "tokens_eos", "tokens"],
301
+ )
302
+ return train_data, valid_data, test_data
303
+
304
+
305
+ if __name__ == "__main__":
306
+
307
+ # Load hyperparameters file with command-line overrides
308
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
309
+ with open(hparams_file) as fin:
310
+ hparams = load_hyperpyyaml(fin, overrides)
311
+
312
+ # If distributed_launch=True then
313
+ # create ddp_group with the right communication protocol
314
+ sb.utils.distributed.ddp_init_group(run_opts)
315
+
316
+ # Dataset preparation (parsing CommonVoice)
317
+ from dvoice_prepare import prepare_dvoice # noqa
318
+
319
+ # Create experiment directory
320
+ sb.create_experiment_directory(
321
+ experiment_directory=hparams["output_folder"],
322
+ hyperparams_to_save=hparams_file,
323
+ overrides=overrides,
324
+ )
325
+
326
+ # Due to DDP, we do the preparation ONLY on the main python process
327
+ run_on_main(
328
+ prepare_dvoice,
329
+ kwargs={
330
+ "data_folder": hparams["data_folder"],
331
+ "save_folder": hparams["save_folder"],
332
+ "train_csv_file": hparams["train_csv_file"],
333
+ "dev_csv_file": hparams["dev_csv_file"],
334
+ "test_csv_file": hparams["test_csv_file"],
335
+ "accented_letters": hparams["accented_letters"],
336
+ "language": hparams["language"],
337
+ "skip_prep": hparams["skip_prep"],
338
+ },
339
+ )
340
+
341
+ # Defining tokenizer and loading it
342
+ tokenizer = SentencePiece(
343
+ model_dir=hparams["save_folder"],
344
+ vocab_size=hparams["output_neurons"],
345
+ annotation_train=hparams["train_csv"],
346
+ annotation_read="wrd",
347
+ model_type=hparams["token_type"],
348
+ character_coverage=hparams["character_coverage"],
349
+ )
350
+
351
+ # Create the datasets objects as well as tokenization and encoding :-D
352
+ train_data, valid_data, test_data = dataio_prepare(hparams, tokenizer)
353
+
354
+ # Trainer initialization
355
+ asr_brain = ASR(
356
+ modules=hparams["modules"],
357
+ hparams=hparams,
358
+ run_opts=run_opts,
359
+ checkpointer=hparams["checkpointer"],
360
+ )
361
+
362
+ # Adding objects to trainer.
363
+ asr_brain.tokenizer = tokenizer
364
+
365
+ # Training
366
+ asr_brain.fit(
367
+ asr_brain.hparams.epoch_counter,
368
+ train_data,
369
+ valid_data,
370
+ train_loader_kwargs=hparams["dataloader_options"],
371
+ valid_loader_kwargs=hparams["test_dataloader_options"],
372
+ )
373
+
374
+ # Test
375
+ asr_brain.hparams.wer_file = hparams["output_folder"] + "/wer_test.txt"
376
+ asr_brain.evaluate(
377
+ test_data,
378
+ min_key="WER",
379
+ test_loader_kwargs=hparams["test_dataloader_options"],
380
+ )
Training/train2.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import torch
3
+ import logging
4
+ import speechbrain as sb
5
+ import torchaudio
6
+ from hyperpyyaml import load_hyperpyyaml
7
+ from speechbrain.tokenizers.SentencePiece import SentencePiece
8
+ from speechbrain.utils.data_utils import undo_padding
9
+ from speechbrain.utils.distributed import run_on_main
10
+
11
+ """Recipe for training a sequence-to-sequence ASR system with CommonVoice.
12
+ The system employs a wav2vec2 encoder and a CTC decoder.
13
+ Decoding is performed with greedy decoding (will be extended to beam search).
14
+
15
+ To run this recipe, do the following:
16
+ > python train_with_wav2vec2.py hparams/train_with_wav2vec2.yaml
17
+
18
+ With the default hyperparameters, the system employs a pretrained wav2vec2 encoder.
19
+ The wav2vec2 model is pretrained following the model given in the hprams file.
20
+ It may be dependent on the language.
21
+
22
+ The neural network is trained with CTC on sub-word units estimated with
23
+ Byte Pairwise Encoding (BPE).
24
+
25
+ The experiment file is flexible enough to support a large variety of
26
+ different systems. By properly changing the parameter files, you can try
27
+ different encoders, decoders, tokens (e.g, characters instead of BPE),
28
+ training languages (all CommonVoice languages), and many
29
+ other possible variations.
30
+
31
+ Authors
32
+ * Titouan Parcollet 2021
33
+ """
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ # Define training procedure
39
+ class ASR(sb.core.Brain):
40
+ def compute_forward(self, batch, stage):
41
+ """Forward computations from the waveform batches to the output probabilities."""
42
+
43
+ batch = batch.to(self.device)
44
+ wavs, wav_lens = batch.sig
45
+ tokens_bos, _ = batch.tokens_bos
46
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
47
+
48
+ if stage == sb.Stage.TRAIN:
49
+ if hasattr(self.hparams, "augmentation"):
50
+ wavs = self.hparams.augmentation(wavs, wav_lens)
51
+
52
+ # Forward pass
53
+ feats = self.modules.wav2vec2(wavs)
54
+ x = self.modules.enc(feats)
55
+ logits = self.modules.ctc_lin(x)
56
+ p_ctc = self.hparams.log_softmax(logits)
57
+
58
+ return p_ctc, wav_lens
59
+
60
+ def compute_objectives(self, predictions, batch, stage):
61
+ """Computes the loss (CTC) given predictions and targets."""
62
+
63
+ p_ctc, wav_lens = predictions
64
+
65
+ ids = batch.id
66
+ tokens_eos, tokens_eos_lens = batch.tokens_eos
67
+ tokens, tokens_lens = batch.tokens
68
+
69
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
70
+
71
+ if stage != sb.Stage.TRAIN:
72
+ # Decode token terms to words
73
+ sequence = sb.decoders.ctc_greedy_decode(
74
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
75
+ )
76
+
77
+ predicted_words = self.tokenizer(sequence, task="decode_from_list")
78
+
79
+ # Convert indices to words
80
+ target_words = undo_padding(tokens, tokens_lens)
81
+ target_words = self.tokenizer(target_words, task="decode_from_list")
82
+
83
+ self.wer_metric.append(ids, predicted_words, target_words)
84
+ self.cer_metric.append(ids, predicted_words, target_words)
85
+
86
+ return loss
87
+
88
+ def fit_batch(self, batch):
89
+ """Train the parameters given a single batch in input"""
90
+ if self.auto_mix_prec:
91
+
92
+ if not self.hparams.wav2vec2.freeze:
93
+ self.wav2vec_optimizer.zero_grad()
94
+ self.model_optimizer.zero_grad()
95
+
96
+ with torch.cuda.amp.autocast():
97
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
98
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
99
+
100
+ self.scaler.scale(loss).backward()
101
+ if not self.hparams.wav2vec2.freeze:
102
+ self.scaler.unscale_(self.wav2vec_optimizer)
103
+ self.scaler.unscale_(self.model_optimizer)
104
+
105
+ if self.check_gradients(loss):
106
+ if not self.hparams.wav2vec2.freeze:
107
+ self.scaler.step(self.wav2vec_optimizer)
108
+ self.scaler.step(self.model_optimizer)
109
+
110
+ self.scaler.update()
111
+ else:
112
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
113
+
114
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
115
+ loss.backward()
116
+
117
+ if self.check_gradients(loss):
118
+ if not self.hparams.wav2vec2.freeze:
119
+ self.wav2vec_optimizer.step()
120
+ self.model_optimizer.step()
121
+
122
+ if not self.hparams.wav2vec2.freeze:
123
+ self.wav2vec_optimizer.zero_grad()
124
+ self.model_optimizer.zero_grad()
125
+
126
+ return loss.detach()
127
+
128
+ def evaluate_batch(self, batch, stage):
129
+ """Computations needed for validation/test batches"""
130
+ predictions = self.compute_forward(batch, stage=stage)
131
+ with torch.no_grad():
132
+ loss = self.compute_objectives(predictions, batch, stage=stage)
133
+ return loss.detach()
134
+
135
+ def on_stage_start(self, stage, epoch):
136
+ """Gets called at the beginning of each epoch"""
137
+ if stage != sb.Stage.TRAIN:
138
+ self.cer_metric = self.hparams.cer_computer()
139
+ self.wer_metric = self.hparams.error_rate_computer()
140
+
141
+ def on_stage_end(self, stage, stage_loss, epoch):
142
+ """Gets called at the end of an epoch."""
143
+ # Compute/store important stats
144
+ stage_stats = {"loss": stage_loss}
145
+ if stage == sb.Stage.TRAIN:
146
+ self.train_stats = stage_stats
147
+ else:
148
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
149
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
150
+
151
+ # Perform end-of-iteration things, like annealing, logging, etc.
152
+ if stage == sb.Stage.VALID:
153
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
154
+ stage_stats["loss"]
155
+ )
156
+ old_lr_wav2vec, new_lr_wav2vec = self.hparams.lr_annealing_wav2vec(
157
+ stage_stats["loss"]
158
+ )
159
+ sb.nnet.schedulers.update_learning_rate(
160
+ self.model_optimizer, new_lr_model
161
+ )
162
+ if not self.hparams.wav2vec2.freeze:
163
+ sb.nnet.schedulers.update_learning_rate(
164
+ self.wav2vec_optimizer, new_lr_wav2vec
165
+ )
166
+ self.hparams.train_logger.log_stats(
167
+ stats_meta={
168
+ "epoch": epoch,
169
+ "lr_model": old_lr_model,
170
+ "lr_wav2vec": old_lr_wav2vec,
171
+ },
172
+ train_stats=self.train_stats,
173
+ valid_stats=stage_stats,
174
+ )
175
+ self.checkpointer.save_and_keep_only(
176
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
177
+ )
178
+ elif stage == sb.Stage.TEST:
179
+ self.hparams.train_logger.log_stats(
180
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
181
+ test_stats=stage_stats,
182
+ )
183
+ with open(self.hparams.wer_file, "w") as w:
184
+ self.wer_metric.write_stats(w)
185
+
186
+ def init_optimizers(self):
187
+ "Initializes the wav2vec2 optimizer and model optimizer"
188
+
189
+ # If the wav2vec encoder is unfrozen, we create the optimizer
190
+ if not self.hparams.wav2vec2.freeze:
191
+ self.wav2vec_optimizer = self.hparams.wav2vec_opt_class(
192
+ self.modules.wav2vec2.parameters()
193
+ )
194
+ if self.checkpointer is not None:
195
+ self.checkpointer.add_recoverable(
196
+ "wav2vec_opt", self.wav2vec_optimizer
197
+ )
198
+
199
+ self.model_optimizer = self.hparams.model_opt_class(
200
+ self.hparams.model.parameters()
201
+ )
202
+
203
+ if self.checkpointer is not None:
204
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
205
+
206
+
207
+ # Define custom data procedure
208
+ def dataio_prepare(hparams, tokenizer):
209
+ """This function prepares the datasets to be used in the brain class.
210
+ It also defines the data processing pipeline through user-defined functions."""
211
+
212
+ # 1. Define datasets
213
+ data_folder = hparams["data_folder"]
214
+
215
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
216
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
217
+ )
218
+
219
+ if hparams["sorting"] == "ascending":
220
+ # we sort training data to speed up training and get better results.
221
+ train_data = train_data.filtered_sorted(
222
+ sort_key="duration",
223
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
224
+ )
225
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
226
+ hparams["dataloader_options"]["shuffle"] = False
227
+
228
+ elif hparams["sorting"] == "descending":
229
+ train_data = train_data.filtered_sorted(
230
+ sort_key="duration",
231
+ reverse=True,
232
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
233
+ )
234
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
235
+ hparams["dataloader_options"]["shuffle"] = False
236
+
237
+ elif hparams["sorting"] == "random":
238
+ pass
239
+
240
+ else:
241
+ raise NotImplementedError(
242
+ "sorting must be random, ascending or descending"
243
+ )
244
+
245
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
246
+ csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
247
+ )
248
+ # We also sort the validation data so it is faster to validate
249
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
250
+
251
+ test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
252
+ csv_path=hparams["test_csv"], replacements={"data_root": data_folder},
253
+ )
254
+
255
+ # We also sort the validation data so it is faster to validate
256
+ test_data = test_data.filtered_sorted(sort_key="duration")
257
+
258
+ datasets = [train_data, valid_data, test_data]
259
+
260
+ # 2. Define audio pipeline:
261
+ @sb.utils.data_pipeline.takes("wav")
262
+ @sb.utils.data_pipeline.provides("sig")
263
+ def audio_pipeline(wav):
264
+ info = torchaudio.info(wav)
265
+ sig = sb.dataio.dataio.read_audio(wav)
266
+ resampled = torchaudio.transforms.Resample(
267
+ info.sample_rate, hparams["sample_rate"],
268
+ )(sig)
269
+ return resampled
270
+
271
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
272
+
273
+ # 3. Define text pipeline:
274
+ @sb.utils.data_pipeline.takes("wrd")
275
+ @sb.utils.data_pipeline.provides(
276
+ "tokens_list", "tokens_bos", "tokens_eos", "tokens"
277
+ )
278
+ def text_pipeline(wrd):
279
+ tokens_list = tokenizer.sp.encode_as_ids(wrd)
280
+ yield tokens_list
281
+ tokens_bos = torch.LongTensor([hparams["bos_index"]] + (tokens_list))
282
+ yield tokens_bos
283
+ tokens_eos = torch.LongTensor(tokens_list + [hparams["eos_index"]])
284
+ yield tokens_eos
285
+ tokens = torch.LongTensor(tokens_list)
286
+ yield tokens
287
+
288
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
289
+
290
+ # 4. Set output:
291
+ sb.dataio.dataset.set_output_keys(
292
+ datasets, ["id", "sig", "tokens_bos", "tokens_eos", "tokens"],
293
+ )
294
+ return train_data, valid_data, test_data
295
+
296
+
297
+ if __name__ == "__main__":
298
+
299
+ # Load hyperparameters file with command-line overrides
300
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
301
+ with open(hparams_file) as fin:
302
+ hparams = load_hyperpyyaml(fin, overrides)
303
+
304
+ # If distributed_launch=True then
305
+ # create ddp_group with the right communication protocol
306
+ sb.utils.distributed.ddp_init_group(run_opts)
307
+
308
+ # Dataset preparation (parsing CommonVoice)
309
+ from dvoice_prepare import prepare_dvoice # noqa
310
+
311
+ # Create experiment directory
312
+ sb.create_experiment_directory(
313
+ experiment_directory=hparams["output_folder"],
314
+ hyperparams_to_save=hparams_file,
315
+ overrides=overrides,
316
+ )
317
+
318
+ # Due to DDP, we do the preparation ONLY on the main python process
319
+ run_on_main(
320
+ prepare_dvoice,
321
+ kwargs={
322
+ "data_folder": hparams["data_folder"],
323
+ "save_folder": hparams["save_folder"],
324
+ "train_csv_file": hparams["train_csv_file"],
325
+ "dev_csv_file": hparams["dev_csv_file"],
326
+ "test_csv_file": hparams["test_csv_file"],
327
+ "accented_letters": hparams["accented_letters"],
328
+ "language": hparams["language"],
329
+ "skip_prep": hparams["skip_prep"],
330
+ },
331
+ )
332
+
333
+ # Defining tokenizer and loading it
334
+ tokenizer = SentencePiece(
335
+ model_dir=hparams["save_folder"],
336
+ vocab_size=hparams["output_neurons"],
337
+ annotation_train=hparams["train_csv"],
338
+ annotation_read="wrd",
339
+ model_type=hparams["token_type"],
340
+ character_coverage=hparams["character_coverage"],
341
+ )
342
+
343
+ # Create the datasets objects as well as tokenization and encoding :-D
344
+ train_data, valid_data, test_data = dataio_prepare(hparams, tokenizer)
345
+
346
+ # Trainer initialization
347
+ asr_brain = ASR(
348
+ modules=hparams["modules"],
349
+ hparams=hparams,
350
+ run_opts=run_opts,
351
+ checkpointer=hparams["checkpointer"],
352
+ )
353
+
354
+ # Adding objects to trainer.
355
+ asr_brain.tokenizer = tokenizer
356
+
357
+ # Training
358
+ asr_brain.fit(
359
+ asr_brain.hparams.epoch_counter,
360
+ train_data,
361
+ valid_data,
362
+ train_loader_kwargs=hparams["dataloader_options"],
363
+ valid_loader_kwargs=hparams["test_dataloader_options"],
364
+ )
365
+
366
+ # Test
367
+ asr_brain.hparams.wer_file = hparams["output_folder"] + "/wer_test.txt"
368
+ asr_brain.evaluate(
369
+ test_data,
370
+ min_key="WER",
371
+ test_loader_kwargs=hparams["test_dataloader_options"],
372
+ )
Training/train_log.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch: 1, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 2.16 - valid loss: 6.31e-01, valid CER: 17.94, valid WER: 61.08
2
+ epoch: 2, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 7.60e-01 - valid loss: 4.76e-01, valid CER: 13.59, valid WER: 49.70
3
+ epoch: 3, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 5.88e-01 - valid loss: 4.04e-01, valid CER: 11.77, valid WER: 43.35
4
+ epoch: 4, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 4.97e-01 - valid loss: 3.75e-01, valid CER: 10.61, valid WER: 39.29
5
+ epoch: 5, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 4.29e-01 - valid loss: 3.71e-01, valid CER: 10.15, valid WER: 37.38
6
+ epoch: 6, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 3.81e-01 - valid loss: 3.54e-01, valid CER: 9.54, valid WER: 35.23
7
+ epoch: 7, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 3.39e-01 - valid loss: 3.41e-01, valid CER: 8.98, valid WER: 33.48
8
+ epoch: 8, lr_model: 1.00e+00, lr_wav2vec: 1.00e-04 - train loss: 3.08e-01 - valid loss: 3.57e-01, valid CER: 8.80, valid WER: 32.41
9
+ epoch: 9, lr_model: 8.00e-01, lr_wav2vec: 9.00e-05 - train loss: 2.70e-01 - valid loss: 3.46e-01, valid CER: 8.47, valid WER: 31.33
10
+ epoch: 10, lr_model: 8.00e-01, lr_wav2vec: 9.00e-05 - train loss: 2.45e-01 - valid loss: 3.64e-01, valid CER: 8.30, valid WER: 30.31
11
+ epoch: 11, lr_model: 6.40e-01, lr_wav2vec: 8.10e-05 - train loss: 2.17e-01 - valid loss: 3.43e-01, valid CER: 8.00, valid WER: 29.91
12
+ epoch: 12, lr_model: 6.40e-01, lr_wav2vec: 8.10e-05 - train loss: 1.98e-01 - valid loss: 3.68e-01, valid CER: 7.93, valid WER: 29.49
13
+ epoch: 13, lr_model: 5.12e-01, lr_wav2vec: 7.29e-05 - train loss: 1.75e-01 - valid loss: 3.94e-01, valid CER: 7.78, valid WER: 29.09
14
+ epoch: 14, lr_model: 4.10e-01, lr_wav2vec: 6.56e-05 - train loss: 1.58e-01 - valid loss: 3.94e-01, valid CER: 7.75, valid WER: 28.90
15
+ epoch: 15, lr_model: 3.28e-01, lr_wav2vec: 5.90e-05 - train loss: 1.37e-01 - valid loss: 4.13e-01, valid CER: 7.64, valid WER: 28.39
16
+ epoch: 16, lr_model: 2.62e-01, lr_wav2vec: 5.31e-05 - train loss: 1.25e-01 - valid loss: 3.95e-01, valid CER: 7.48, valid WER: 27.94
17
+ epoch: 17, lr_model: 2.62e-01, lr_wav2vec: 5.31e-05 - train loss: 1.20e-01 - valid loss: 4.12e-01, valid CER: 7.36, valid WER: 27.73
18
+ epoch: 18, lr_model: 2.10e-01, lr_wav2vec: 4.78e-05 - train loss: 1.03e-01 - valid loss: 4.31e-01, valid CER: 7.44, valid WER: 27.69
19
+ epoch: 19, lr_model: 1.68e-01, lr_wav2vec: 4.30e-05 - train loss: 9.82e-02 - valid loss: 4.35e-01, valid CER: 7.28, valid WER: 27.08
20
+ epoch: 20, lr_model: 1.34e-01, lr_wav2vec: 3.87e-05 - train loss: 8.78e-02 - valid loss: 4.42e-01, valid CER: 7.27, valid WER: 27.18
21
+ epoch: 21, lr_model: 1.07e-01, lr_wav2vec: 3.49e-05 - train loss: 8.27e-02 - valid loss: 4.72e-01, valid CER: 7.16, valid WER: 26.79
22
+ epoch: 22, lr_model: 8.59e-02, lr_wav2vec: 3.14e-05 - train loss: 7.39e-02 - valid loss: 4.88e-01, valid CER: 7.03, valid WER: 26.28
23
+ epoch: 23, lr_model: 6.87e-02, lr_wav2vec: 2.82e-05 - train loss: 7.24e-02 - valid loss: 4.92e-01, valid CER: 6.95, valid WER: 26.03
24
+ epoch: 24, lr_model: 5.50e-02, lr_wav2vec: 2.54e-05 - train loss: 6.76e-02 - valid loss: 4.80e-01, valid CER: 6.89, valid WER: 26.11
25
+ epoch: 25, lr_model: 5.50e-02, lr_wav2vec: 2.54e-05 - train loss: 6.21e-02 - valid loss: 5.01e-01, valid CER: 6.88, valid WER: 25.88
26
+ epoch: 26, lr_model: 4.40e-02, lr_wav2vec: 2.29e-05 - train loss: 5.94e-02 - valid loss: 4.89e-01, valid CER: 6.90, valid WER: 26.08
27
+ epoch: 27, lr_model: 4.40e-02, lr_wav2vec: 2.29e-05 - train loss: 5.54e-02 - valid loss: 5.09e-01, valid CER: 6.87, valid WER: 26.04
28
+ epoch: 28, lr_model: 3.52e-02, lr_wav2vec: 2.06e-05 - train loss: 5.55e-02 - valid loss: 5.01e-01, valid CER: 6.76, valid WER: 25.67
29
+ epoch: 29, lr_model: 3.52e-02, lr_wav2vec: 2.06e-05 - train loss: 5.44e-02 - valid loss: 5.10e-01, valid CER: 6.72, valid WER: 25.62
30
+ epoch: 30, lr_model: 2.81e-02, lr_wav2vec: 1.85e-05 - train loss: 5.01e-02 - valid loss: 5.29e-01, valid CER: 6.71, valid WER: 25.50
31
+ Epoch loaded: 30 - test loss: 5.24e-01, test CER: 6.57, test WER: 24.92
32
+ Epoch loaded: 30 - test loss: 5.24e-01, test CER: 6.57, test WER: 24.92
33
+ Epoch loaded: 30 - test loss: 5.24e-01, test CER: 6.57, test WER: 24.92
Training/wer_test.txt ADDED
The diff for this file is too large to render. See raw diff
 
graphs.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import wandb
2
+ import re
3
+
4
+ # Initialize WandB project
5
+ wandb.init(project="training-amharic-stt-visualizations", name="metrics_visualization")
6
+
7
+ # Path to your training log file
8
+ log_file = "Training/train_log.txt"
9
+
10
+ # Function to parse logs
11
+ # Function to parse logs
12
+ def parse_logs(log_file):
13
+ """
14
+ Parses the training logs and yields metrics as dictionaries.
15
+ """
16
+ with open(log_file, "r") as f:
17
+ for line in f:
18
+ # Match the log format using regex
19
+ match = re.match(
20
+ r"epoch: (?P<epoch>\d+), lr_model: (?P<lr_model>[0-9.e+-]+), lr_wav2vec: (?P<lr_wav2vec>[0-9.e+-]+) - "
21
+ r"train loss: (?P<train_loss>[0-9.e+-]+) - valid loss: (?P<valid_loss>[0-9.e+-]+), "
22
+ r"valid CER: (?P<valid_CER>[0-9.e+-]+), valid WER: (?P<valid_WER>[0-9.e+-]+)",
23
+ line.strip()
24
+ )
25
+ if match:
26
+ metrics = {key: float(value) if '.' in value or 'e' in value else int(value)
27
+ for key, value in match.groupdict().items()}
28
+ yield metrics
29
+
30
+ # Parse logs and log to WandB
31
+ for metrics in parse_logs(log_file):
32
+ # Log metrics to WandB
33
+ wandb.log(metrics)
34
+
35
+ # Finish WandB run
36
+ wandb.finish()