Spaces:
Runtime error
Runtime error
File size: 5,664 Bytes
466446e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
# Generated 2022-05-27 from:
# /data/n.abdoumohamed/dvoice-africa/speechbrain/recipes/DVoice/ASR/CTC/hparams/train_amharic.yaml
# yamllint disable
# ################################
# Model: wav2vec2 + DNN + CTC
# Augmentation: SpecAugment
# Authors: Titouan Parcollet 2021
# ################################
# Seed needs to be set at top of yaml, before objects with parameters are made
seed: 1249
__set_seed: !!python/object/apply:torch.manual_see
output_folder: results/wav2vec2_ctc_AMHARIC/1249
wer_file: results/wav2vec2_ctc_AMHARIC/1249/wer.txt
save_folder: results/wav2vec2_ctc_AMHARIC/1249/save
train_log: results/wav2vec2_ctc_AMHARIC/1249/train_log.txt
# URL for the biggest LeBenchmark wav2vec french.
wav2vec2_hub: facebook/wav2vec2-large-xlsr-53
# Data files
data_folder: ASR/AMHARIC/data # e.g, /localscratch/cv-corpus-5.1-2020-06-22/fr
train_csv_file: ASR/AMHARIC/data/train.csv # Standard CommonVoice .tsv files
dev_csv_file: ASR/AMHARIC/data/dev.csv # Standard CommonVoice .tsv files
test_csv_file: ASR/AMHARIC/data/test.csv # Standard CommonVoice .tsv files
accented_letters: true
language: amharic
train_csv: results/wav2vec2_ctc_AMHARIC/save/train.csv
valid_csv: results/wav2vec2_ctc_AMHARIC/save/dev.csv
test_csv: results/wav2vec2_ctc_AMHARIC/save/test.csv
skip_prep: false # Skip data preparation
data_augmentation: false # Skip data augmentation
# We remove utterance slonger than 10s in the train/dev/test sets as
# longer sentences certainly correspond to "open microphones".
avoid_if_longer_than: 15.0
# Training parameters
number_of_epochs: 30
number_of_ctc_epochs: 15
lr: 1.0
lr_wav2vec: 0.0001
ctc_weight: 0.3
sorting: ascending
auto_mix_prec: false
sample_rate: 16000
ckpt_interval_minutes: 30 # save checkpoint every N min
# With data_parallel batch_size is split into N jobs
# With DDP batch_size is multiplied by N jobs
# Must be 6 per GPU to fit 16GB of VRAM
batch_size: 4
test_batch_size: 4
dataloader_options:
batch_size: 4
num_workers: 2
test_dataloader_options:
batch_size: 4
num_workers: 2
# BPE parameters
token_type: char # ["unigram", "bpe", "char"]
character_coverage: 1.0
# Model parameters
activation: !name:torch.nn.LeakyReLU
wav2vec_output_dim: 1024
dnn_neurons: 1024
freeze_wav2vec: false
# Outputs
output_neurons: 224 # BPE size, index(blank/eos/bos) = 0
# Decoding parameters
# Be sure that the bos and eos index match with the BPEs ones
blank_index: 0
bos_index: 1
eos_index: 2
min_decode_ratio: 0.0
max_decode_ratio: 1.0
beam_size: 80
eos_threshold: 1.5
using_max_attn_shift: true
max_attn_shift: 140
ctc_weight_decode: 0.0
temperature: 1.50
#
# Functions and classes
#
epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 30
augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
sample_rate: 16000
speeds: [95, 100, 105]
enc: &id002 !new:speechbrain.nnet.containers.Sequential
input_shape: [null, null, 1024]
linear1: !name:speechbrain.nnet.linear.Linear
n_neurons: 1024
bias: true
bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
activation: !new:torch.nn.LeakyReLU
drop: !new:torch.nn.Dropout
p: 0.15
linear2: !name:speechbrain.nnet.linear.Linear
n_neurons: 1024
bias: true
bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
activation2: !new:torch.nn.LeakyReLU
drop2: !new:torch.nn.Dropout
p: 0.15
linear3: !name:speechbrain.nnet.linear.Linear
n_neurons: 1024
bias: true
bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
activation3: !new:torch.nn.LeakyReLU
wav2vec2: &id001 !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
source: facebook/wav2vec2-large-xlsr-53
output_norm: true
freeze: false
save_path: results/wav2vec2_ctc_AMHARIC/1249/save/wav2vec2_checkpoint
#####
# Uncomment this block if you prefer to use a Fairseq pretrained model instead
# of a HuggingFace one. Here, we provide an URL that is obtained from the
# Fairseq github for the multilingual XLSR.
#
#wav2vec2_url: https://dl.fbaipublicfiles.com/fairseq/wav2vec/xlsr_53_56k.pt
#wav2vec2: !new:speechbrain.lobes.models.fairseq_wav2vec.FairseqWav2Vec2
# pretrained_path: !ref <wav2vec2_url>
# output_norm: True
# freeze: False
# save_path: !ref <save_folder>/wav2vec2_checkpoint/model.pt
#####
ctc_lin: &id003 !new:speechbrain.nnet.linear.Linear
input_size: 1024
n_neurons: 224
log_softmax: !new:speechbrain.nnet.activations.Softmax
apply_log: true
ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
blank_index: 0
modules:
wav2vec2: *id001
enc: *id002
ctc_lin: *id003
model: &id004 !new:torch.nn.ModuleList
- [*id002, *id003]
model_opt_class: !name:torch.optim.Adadelta
lr: 1.0
rho: 0.95
eps: 1.e-8
wav2vec_opt_class: !name:torch.optim.Adam
lr: 0.0001
lr_annealing_model: &id005 !new:speechbrain.nnet.schedulers.NewBobScheduler
initial_value: 1.0
improvement_threshold: 0.0025
annealing_factor: 0.8
patient: 0
lr_annealing_wav2vec: &id006 !new:speechbrain.nnet.schedulers.NewBobScheduler
initial_value: 0.0001
improvement_threshold: 0.0025
annealing_factor: 0.9
patient: 0
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: results/wav2vec2_ctc_AMHARIC/1249/save
recoverables:
wav2vec2: *id001
model: *id004
scheduler_model: *id005
scheduler_wav2vec: *id006
counter: *id007
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: results/wav2vec2_ctc_AMHARIC/1249/train_log.txt
error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
split_tokens: true
|