|
05/22/2023 13:20:20 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 2distributed training: True, 16-bits training: True |
|
05/22/2023 13:20:20 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( |
|
_n_gpu=2, |
|
adafactor=False, |
|
adam_beta1=0.9, |
|
adam_beta2=0.999, |
|
adam_epsilon=1e-08, |
|
auto_find_batch_size=False, |
|
bf16=False, |
|
bf16_full_eval=False, |
|
data_seed=None, |
|
dataloader_drop_last=False, |
|
dataloader_num_workers=0, |
|
dataloader_pin_memory=True, |
|
ddp_backend=None, |
|
ddp_bucket_cap_mb=None, |
|
ddp_find_unused_parameters=None, |
|
ddp_timeout=1800, |
|
debug=[], |
|
deepspeed=None, |
|
disable_tqdm=False, |
|
do_eval=True, |
|
do_predict=False, |
|
do_train=True, |
|
eval_accumulation_steps=None, |
|
eval_delay=0, |
|
eval_steps=1000, |
|
evaluation_strategy=steps, |
|
fp16=True, |
|
fp16_backend=auto, |
|
fp16_full_eval=False, |
|
fp16_opt_level=O1, |
|
fsdp=[], |
|
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, |
|
fsdp_min_num_params=0, |
|
fsdp_transformer_layer_cls_to_wrap=None, |
|
full_determinism=False, |
|
generation_config=None, |
|
generation_max_length=225, |
|
generation_num_beams=None, |
|
gradient_accumulation_steps=8, |
|
gradient_checkpointing=True, |
|
greater_is_better=False, |
|
group_by_length=False, |
|
half_precision_backend=auto, |
|
hub_model_id=None, |
|
hub_private_repo=False, |
|
hub_strategy=every_save, |
|
hub_token=<HUB_TOKEN>, |
|
ignore_data_skip=False, |
|
include_inputs_for_metrics=False, |
|
jit_mode_eval=False, |
|
label_names=None, |
|
label_smoothing_factor=0.0, |
|
learning_rate=1.75e-05, |
|
length_column_name=input_length, |
|
load_best_model_at_end=True, |
|
local_rank=0, |
|
log_level=passive, |
|
log_level_replica=warning, |
|
log_on_each_node=True, |
|
logging_dir=./runs/May22_13-20-19_crimv3mgpu016, |
|
logging_first_step=False, |
|
logging_nan_inf_filter=True, |
|
logging_steps=25, |
|
logging_strategy=steps, |
|
lr_scheduler_type=linear, |
|
max_grad_norm=1.0, |
|
max_steps=-1, |
|
metric_for_best_model=wer, |
|
mp_parameters=, |
|
no_cuda=False, |
|
num_train_epochs=30.0, |
|
optim=adamw_hf, |
|
optim_args=None, |
|
output_dir=./, |
|
overwrite_output_dir=True, |
|
past_index=-1, |
|
per_device_eval_batch_size=32, |
|
per_device_train_batch_size=32, |
|
predict_with_generate=True, |
|
prediction_loss_only=False, |
|
push_to_hub=True, |
|
push_to_hub_model_id=None, |
|
push_to_hub_organization=None, |
|
push_to_hub_token=<PUSH_TO_HUB_TOKEN>, |
|
ray_scope=last, |
|
remove_unused_columns=True, |
|
report_to=['wandb'], |
|
resume_from_checkpoint=None, |
|
run_name=./, |
|
save_on_each_node=False, |
|
save_safetensors=False, |
|
save_steps=1000, |
|
save_strategy=steps, |
|
save_total_limit=None, |
|
seed=42, |
|
sharded_ddp=[], |
|
skip_memory_metrics=True, |
|
sortish_sampler=False, |
|
tf32=None, |
|
torch_compile=False, |
|
torch_compile_backend=None, |
|
torch_compile_mode=None, |
|
torchdynamo=None, |
|
tpu_metrics_debug=False, |
|
tpu_num_cores=None, |
|
use_ipex=False, |
|
use_legacy_prediction_loop=False, |
|
use_mps_device=False, |
|
warmup_ratio=0.0, |
|
warmup_steps=4000, |
|
weight_decay=0.0, |
|
xpu_backend=None, |
|
) |
|
05/22/2023 13:20:20 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( |
|
_n_gpu=2, |
|
adafactor=False, |
|
adam_beta1=0.9, |
|
adam_beta2=0.999, |
|
adam_epsilon=1e-08, |
|
auto_find_batch_size=False, |
|
bf16=False, |
|
bf16_full_eval=False, |
|
data_seed=None, |
|
dataloader_drop_last=False, |
|
dataloader_num_workers=0, |
|
dataloader_pin_memory=True, |
|
ddp_backend=None, |
|
ddp_bucket_cap_mb=None, |
|
ddp_find_unused_parameters=None, |
|
ddp_timeout=1800, |
|
debug=[], |
|
deepspeed=None, |
|
disable_tqdm=False, |
|
do_eval=True, |
|
do_predict=False, |
|
do_train=True, |
|
eval_accumulation_steps=None, |
|
eval_delay=0, |
|
eval_steps=1000, |
|
evaluation_strategy=steps, |
|
fp16=True, |
|
fp16_backend=auto, |
|
fp16_full_eval=False, |
|
fp16_opt_level=O1, |
|
fsdp=[], |
|
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, |
|
fsdp_min_num_params=0, |
|
fsdp_transformer_layer_cls_to_wrap=None, |
|
full_determinism=False, |
|
generation_config=None, |
|
generation_max_length=225, |
|
generation_num_beams=None, |
|
gradient_accumulation_steps=8, |
|
gradient_checkpointing=True, |
|
greater_is_better=False, |
|
group_by_length=False, |
|
half_precision_backend=auto, |
|
hub_model_id=None, |
|
hub_private_repo=False, |
|
hub_strategy=every_save, |
|
hub_token=<HUB_TOKEN>, |
|
ignore_data_skip=False, |
|
include_inputs_for_metrics=False, |
|
jit_mode_eval=False, |
|
label_names=None, |
|
label_smoothing_factor=0.0, |
|
learning_rate=1.75e-05, |
|
length_column_name=input_length, |
|
load_best_model_at_end=True, |
|
local_rank=0, |
|
log_level=passive, |
|
log_level_replica=warning, |
|
log_on_each_node=True, |
|
logging_dir=./runs/May22_13-20-19_crimv3mgpu016, |
|
logging_first_step=False, |
|
logging_nan_inf_filter=True, |
|
logging_steps=25, |
|
logging_strategy=steps, |
|
lr_scheduler_type=linear, |
|
max_grad_norm=1.0, |
|
max_steps=-1, |
|
metric_for_best_model=wer, |
|
mp_parameters=, |
|
no_cuda=False, |
|
num_train_epochs=30.0, |
|
optim=adamw_hf, |
|
optim_args=None, |
|
output_dir=./, |
|
overwrite_output_dir=True, |
|
past_index=-1, |
|
per_device_eval_batch_size=32, |
|
per_device_train_batch_size=32, |
|
predict_with_generate=True, |
|
prediction_loss_only=False, |
|
push_to_hub=True, |
|
push_to_hub_model_id=None, |
|
push_to_hub_organization=None, |
|
push_to_hub_token=<PUSH_TO_HUB_TOKEN>, |
|
ray_scope=last, |
|
remove_unused_columns=True, |
|
report_to=['wandb'], |
|
resume_from_checkpoint=None, |
|
run_name=./, |
|
save_on_each_node=False, |
|
save_safetensors=False, |
|
save_steps=1000, |
|
save_strategy=steps, |
|
save_total_limit=None, |
|
seed=42, |
|
sharded_ddp=[], |
|
skip_memory_metrics=True, |
|
sortish_sampler=False, |
|
tf32=None, |
|
torch_compile=False, |
|
torch_compile_backend=None, |
|
torch_compile_mode=None, |
|
torchdynamo=None, |
|
tpu_metrics_debug=False, |
|
tpu_num_cores=None, |
|
use_ipex=False, |
|
use_legacy_prediction_loop=False, |
|
use_mps_device=False, |
|
warmup_ratio=0.0, |
|
warmup_steps=4000, |
|
weight_decay=0.0, |
|
xpu_backend=None, |
|
) |
|
[INFO|configuration_utils.py:669] 2023-05-22 13:20:37,583 >> loading configuration file config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/config.json |
|
[INFO|configuration_utils.py:725] 2023-05-22 13:20:37,651 >> Model config WhisperConfig { |
|
"_name_or_path": "openai/whisper-small", |
|
"activation_dropout": 0.0, |
|
"activation_function": "gelu", |
|
"apply_spec_augment": false, |
|
"architectures": [ |
|
"WhisperForConditionalGeneration" |
|
], |
|
"attention_dropout": 0.0, |
|
"begin_suppress_tokens": [ |
|
220, |
|
50257 |
|
], |
|
"bos_token_id": 50257, |
|
"classifier_proj_size": 256, |
|
"d_model": 768, |
|
"decoder_attention_heads": 12, |
|
"decoder_ffn_dim": 3072, |
|
"decoder_layerdrop": 0.0, |
|
"decoder_layers": 12, |
|
"decoder_start_token_id": 50258, |
|
"dropout": 0.0, |
|
"encoder_attention_heads": 12, |
|
"encoder_ffn_dim": 3072, |
|
"encoder_layerdrop": 0.0, |
|
"encoder_layers": 12, |
|
"eos_token_id": 50257, |
|
"forced_decoder_ids": [ |
|
[ |
|
1, |
|
50259 |
|
], |
|
[ |
|
2, |
|
50359 |
|
], |
|
[ |
|
3, |
|
50363 |
|
] |
|
], |
|
"init_std": 0.02, |
|
"is_encoder_decoder": true, |
|
"mask_feature_length": 10, |
|
"mask_feature_min_masks": 0, |
|
"mask_feature_prob": 0.0, |
|
"mask_time_length": 10, |
|
"mask_time_min_masks": 2, |
|
"mask_time_prob": 0.05, |
|
"max_length": 448, |
|
"max_source_positions": 1500, |
|
"max_target_positions": 448, |
|
"model_type": "whisper", |
|
"num_hidden_layers": 12, |
|
"num_mel_bins": 80, |
|
"pad_token_id": 50257, |
|
"scale_embedding": false, |
|
"suppress_tokens": [ |
|
1, |
|
2, |
|
7, |
|
8, |
|
9, |
|
10, |
|
14, |
|
25, |
|
26, |
|
27, |
|
28, |
|
29, |
|
31, |
|
58, |
|
59, |
|
60, |
|
61, |
|
62, |
|
63, |
|
90, |
|
91, |
|
92, |
|
93, |
|
359, |
|
503, |
|
522, |
|
542, |
|
873, |
|
893, |
|
902, |
|
918, |
|
922, |
|
931, |
|
1350, |
|
1853, |
|
1982, |
|
2460, |
|
2627, |
|
3246, |
|
3253, |
|
3268, |
|
3536, |
|
3846, |
|
3961, |
|
4183, |
|
4667, |
|
6585, |
|
6647, |
|
7273, |
|
9061, |
|
9383, |
|
10428, |
|
10929, |
|
11938, |
|
12033, |
|
12331, |
|
12562, |
|
13793, |
|
14157, |
|
14635, |
|
15265, |
|
15618, |
|
16553, |
|
16604, |
|
18362, |
|
18956, |
|
20075, |
|
21675, |
|
22520, |
|
26130, |
|
26161, |
|
26435, |
|
28279, |
|
29464, |
|
31650, |
|
32302, |
|
32470, |
|
36865, |
|
42863, |
|
47425, |
|
49870, |
|
50254, |
|
50258, |
|
50360, |
|
50361, |
|
50362 |
|
], |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.29.0.dev0", |
|
"use_cache": true, |
|
"use_weighted_layer_sum": false, |
|
"vocab_size": 51865 |
|
} |
|
|
|
[INFO|feature_extraction_utils.py:469] 2023-05-22 13:20:37,926 >> loading configuration file preprocessor_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/preprocessor_config.json |
|
[INFO|feature_extraction_utils.py:511] 2023-05-22 13:20:37,955 >> Feature extractor WhisperFeatureExtractor { |
|
"chunk_length": 30, |
|
"feature_extractor_type": "WhisperFeatureExtractor", |
|
"feature_size": 80, |
|
"hop_length": 160, |
|
"n_fft": 400, |
|
"n_samples": 480000, |
|
"nb_max_frames": 3000, |
|
"padding_side": "right", |
|
"padding_value": 0.0, |
|
"processor_class": "WhisperProcessor", |
|
"return_attention_mask": false, |
|
"sampling_rate": 16000 |
|
} |
|
|
|
[INFO|tokenization_utils_base.py:1810] 2023-05-22 13:20:38,269 >> loading file vocab.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/vocab.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-22 13:20:38,269 >> loading file tokenizer.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/tokenizer.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-22 13:20:38,269 >> loading file merges.txt from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/merges.txt |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-22 13:20:38,269 >> loading file normalizer.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/normalizer.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-22 13:20:38,269 >> loading file added_tokens.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/added_tokens.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-22 13:20:38,269 >> loading file special_tokens_map.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/special_tokens_map.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-22 13:20:38,269 >> loading file tokenizer_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/tokenizer_config.json |
|
[INFO|modeling_utils.py:2542] 2023-05-22 13:20:38,651 >> loading weights file pytorch_model.bin from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/pytorch_model.bin |
|
[INFO|configuration_utils.py:577] 2023-05-22 13:20:47,050 >> Generate config GenerationConfig { |
|
"_from_model_config": true, |
|
"begin_suppress_tokens": [ |
|
220, |
|
50257 |
|
], |
|
"bos_token_id": 50257, |
|
"decoder_start_token_id": 50258, |
|
"eos_token_id": 50257, |
|
"max_length": 448, |
|
"pad_token_id": 50257, |
|
"transformers_version": "4.29.0.dev0", |
|
"use_cache": false |
|
} |
|
|
|
[INFO|modeling_utils.py:3211] 2023-05-22 13:20:49,666 >> All model checkpoint weights were used when initializing WhisperForConditionalGeneration. |
|
|
|
[INFO|modeling_utils.py:3219] 2023-05-22 13:20:49,666 >> All the weights of WhisperForConditionalGeneration were initialized from the model checkpoint at openai/whisper-small. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use WhisperForConditionalGeneration for predictions without further training. |
|
[INFO|configuration_utils.py:539] 2023-05-22 13:20:50,330 >> loading configuration file generation_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/generation_config.json |
|
[INFO|configuration_utils.py:577] 2023-05-22 13:20:50,331 >> Generate config GenerationConfig { |
|
"begin_suppress_tokens": [ |
|
220, |
|
50257 |
|
], |
|
"bos_token_id": 50257, |
|
"decoder_start_token_id": 50258, |
|
"eos_token_id": 50257, |
|
"forced_decoder_ids": [ |
|
[ |
|
1, |
|
null |
|
], |
|
[ |
|
2, |
|
50359 |
|
] |
|
], |
|
"is_multilingual": true, |
|
"lang_to_id": { |
|
"<|af|>": 50327, |
|
"<|am|>": 50334, |
|
"<|ar|>": 50272, |
|
"<|as|>": 50350, |
|
"<|az|>": 50304, |
|
"<|ba|>": 50355, |
|
"<|be|>": 50330, |
|
"<|bg|>": 50292, |
|
"<|bn|>": 50302, |
|
"<|bo|>": 50347, |
|
"<|br|>": 50309, |
|
"<|bs|>": 50315, |
|
"<|ca|>": 50270, |
|
"<|cs|>": 50283, |
|
"<|cy|>": 50297, |
|
"<|da|>": 50285, |
|
"<|de|>": 50261, |
|
"<|el|>": 50281, |
|
"<|en|>": 50259, |
|
"<|es|>": 50262, |
|
"<|et|>": 50307, |
|
"<|eu|>": 50310, |
|
"<|fa|>": 50300, |
|
"<|fi|>": 50277, |
|
"<|fo|>": 50338, |
|
"<|fr|>": 50265, |
|
"<|gl|>": 50319, |
|
"<|gu|>": 50333, |
|
"<|haw|>": 50352, |
|
"<|ha|>": 50354, |
|
"<|he|>": 50279, |
|
"<|hi|>": 50276, |
|
"<|hr|>": 50291, |
|
"<|ht|>": 50339, |
|
"<|hu|>": 50286, |
|
"<|hy|>": 50312, |
|
"<|id|>": 50275, |
|
"<|is|>": 50311, |
|
"<|it|>": 50274, |
|
"<|ja|>": 50266, |
|
"<|jw|>": 50356, |
|
"<|ka|>": 50329, |
|
"<|kk|>": 50316, |
|
"<|km|>": 50323, |
|
"<|kn|>": 50306, |
|
"<|ko|>": 50264, |
|
"<|la|>": 50294, |
|
"<|lb|>": 50345, |
|
"<|ln|>": 50353, |
|
"<|lo|>": 50336, |
|
"<|lt|>": 50293, |
|
"<|lv|>": 50301, |
|
"<|mg|>": 50349, |
|
"<|mi|>": 50295, |
|
"<|mk|>": 50308, |
|
"<|ml|>": 50296, |
|
"<|mn|>": 50314, |
|
"<|mr|>": 50320, |
|
"<|ms|>": 50282, |
|
"<|mt|>": 50343, |
|
"<|my|>": 50346, |
|
"<|ne|>": 50313, |
|
"<|nl|>": 50271, |
|
"<|nn|>": 50342, |
|
"<|no|>": 50288, |
|
"<|oc|>": 50328, |
|
"<|pa|>": 50321, |
|
"<|pl|>": 50269, |
|
"<|ps|>": 50340, |
|
"<|pt|>": 50267, |
|
"<|ro|>": 50284, |
|
"<|ru|>": 50263, |
|
"<|sa|>": 50344, |
|
"<|sd|>": 50332, |
|
"<|si|>": 50322, |
|
"<|sk|>": 50298, |
|
"<|sl|>": 50305, |
|
"<|sn|>": 50324, |
|
"<|so|>": 50326, |
|
"<|sq|>": 50317, |
|
"<|sr|>": 50303, |
|
"<|su|>": 50357, |
|
"<|sv|>": 50273, |
|
"<|sw|>": 50318, |
|
"<|ta|>": 50287, |
|
"<|te|>": 50299, |
|
"<|tg|>": 50331, |
|
"<|th|>": 50289, |
|
"<|tk|>": 50341, |
|
"<|tl|>": 50348, |
|
"<|tr|>": 50268, |
|
"<|tt|>": 50351, |
|
"<|uk|>": 50280, |
|
"<|ur|>": 50290, |
|
"<|uz|>": 50337, |
|
"<|vi|>": 50278, |
|
"<|yi|>": 50335, |
|
"<|yo|>": 50325, |
|
"<|zh|>": 50260 |
|
}, |
|
"max_initial_timestamp_index": 1, |
|
"max_length": 448, |
|
"no_timestamps_token_id": 50363, |
|
"pad_token_id": 50257, |
|
"return_timestamps": false, |
|
"suppress_tokens": [ |
|
1, |
|
2, |
|
7, |
|
8, |
|
9, |
|
10, |
|
14, |
|
25, |
|
26, |
|
27, |
|
28, |
|
29, |
|
31, |
|
58, |
|
59, |
|
60, |
|
61, |
|
62, |
|
63, |
|
90, |
|
91, |
|
92, |
|
93, |
|
359, |
|
503, |
|
522, |
|
542, |
|
873, |
|
893, |
|
902, |
|
918, |
|
922, |
|
931, |
|
1350, |
|
1853, |
|
1982, |
|
2460, |
|
2627, |
|
3246, |
|
3253, |
|
3268, |
|
3536, |
|
3846, |
|
3961, |
|
4183, |
|
4667, |
|
6585, |
|
6647, |
|
7273, |
|
9061, |
|
9383, |
|
10428, |
|
10929, |
|
11938, |
|
12033, |
|
12331, |
|
12562, |
|
13793, |
|
14157, |
|
14635, |
|
15265, |
|
15618, |
|
16553, |
|
16604, |
|
18362, |
|
18956, |
|
20075, |
|
21675, |
|
22520, |
|
26130, |
|
26161, |
|
26435, |
|
28279, |
|
29464, |
|
31650, |
|
32302, |
|
32470, |
|
36865, |
|
42863, |
|
47425, |
|
49870, |
|
50254, |
|
50258, |
|
50358, |
|
50359, |
|
50360, |
|
50361, |
|
50362 |
|
], |
|
"task_to_id": { |
|
"transcribe": 50359, |
|
"translate": 50358 |
|
}, |
|
"transformers_version": "4.29.0.dev0" |
|
} |
|
|
|
[INFO|feature_extraction_utils.py:369] 2023-05-22 13:20:52,959 >> Feature extractor saved in ./preprocessor_config.json |
|
[INFO|tokenization_utils_base.py:2181] 2023-05-22 13:20:52,962 >> tokenizer config file saved in ./tokenizer_config.json |
|
[INFO|tokenization_utils_base.py:2188] 2023-05-22 13:20:52,965 >> Special tokens file saved in ./special_tokens_map.json |
|
[INFO|configuration_utils.py:458] 2023-05-22 13:20:53,103 >> Configuration saved in ./config.json |
|
[INFO|image_processing_utils.py:307] 2023-05-22 13:20:53,104 >> loading configuration file ./preprocessor_config.json |
|
[INFO|feature_extraction_utils.py:467] 2023-05-22 13:20:53,134 >> loading configuration file ./preprocessor_config.json |
|
[INFO|feature_extraction_utils.py:511] 2023-05-22 13:20:53,135 >> Feature extractor WhisperFeatureExtractor { |
|
"chunk_length": 30, |
|
"feature_extractor_type": "WhisperFeatureExtractor", |
|
"feature_size": 80, |
|
"hop_length": 160, |
|
"n_fft": 400, |
|
"n_samples": 480000, |
|
"nb_max_frames": 3000, |
|
"padding_side": "right", |
|
"padding_value": 0.0, |
|
"processor_class": "WhisperProcessor", |
|
"return_attention_mask": false, |
|
"sampling_rate": 16000 |
|
} |
|
|
|
[INFO|tokenization_utils_base.py:1808] 2023-05-22 13:20:53,136 >> loading file vocab.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-22 13:20:53,136 >> loading file tokenizer.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-22 13:20:53,136 >> loading file merges.txt |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-22 13:20:53,136 >> loading file normalizer.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-22 13:20:53,136 >> loading file added_tokens.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-22 13:20:53,136 >> loading file special_tokens_map.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-22 13:20:53,136 >> loading file tokenizer_config.json |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,201 >> Adding <|startoftranscript|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,201 >> Adding <|en|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|zh|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|de|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|es|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ru|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ko|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|fr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ja|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|pt|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|tr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|pl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ca|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|nl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ar|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|sv|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|it|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|id|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|hi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|fi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|vi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|he|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|uk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|el|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ms|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|cs|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ro|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|da|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|hu|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ta|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|no|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|th|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ur|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|hr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|bg|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|lt|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|la|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|mi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|ml|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|cy|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|sk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,202 >> Adding <|te|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|fa|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|lv|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|bn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|sr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|az|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|sl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|kn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|et|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|mk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|br|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|eu|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|is|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|hy|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|ne|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|mn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|bs|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|kk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|sq|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|sw|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|gl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|mr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|pa|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|si|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|km|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|sn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|yo|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|so|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|af|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|oc|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|ka|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|be|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|tg|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|sd|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|gu|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|am|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|yi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|lo|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|uz|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|fo|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|ht|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,203 >> Adding <|ps|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|tk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|nn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|mt|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|sa|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|lb|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|my|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|bo|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|tl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|mg|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|as|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|tt|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|haw|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|ln|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|ha|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|ba|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|jw|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|su|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|translate|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|transcribe|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|startoflm|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|startofprev|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|nocaptions|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-22 13:20:53,204 >> Adding <|notimestamps|> to the vocabulary |
|
/home/local/QCRI/dizham/kanari/whisper/whisper-small-ar/./ is already a clone of https://huggingface.co/danielizham/whisper-small-ar. Make sure you pull the latest changes with `repo.git_pull()`. |
|
05/22/2023 13:21:02 - WARNING - huggingface_hub.repository - /home/local/QCRI/dizham/kanari/whisper/whisper-small-ar/./ is already a clone of https://huggingface.co/danielizham/whisper-small-ar. Make sure you pull the latest changes with `repo.git_pull()`. |
|
Traceback (most recent call last): |
|
File "/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/huggingface_hub/repository.py", line 987, in git_pull |
|
result = run_subprocess(command, self.local_dir) |
|
File "/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/huggingface_hub/utils/_subprocess.py", line 83, in run_subprocess |
|
return subprocess.run( |
|
File "/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/subprocess.py", line 528, in run |
|
raise CalledProcessError(retcode, process.args, |
|
subprocess.CalledProcessError: Command '['git', 'pull']' returned non-zero exit status 128. |
|
|
|
During handling of the above exception, another exception occurred: |
|
|
|
Traceback (most recent call last): |
|
File "/home/local/QCRI/dizham/kanari/whisper/whisper-small-ar/run_speech_recognition_seq2seq_streaming.py", line 629, in <module> |
|
main() |
|
File "/home/local/QCRI/dizham/kanari/whisper/whisper-small-ar/run_speech_recognition_seq2seq_streaming.py", line 560, in main |
|
trainer = Seq2SeqTrainer( |
|
File "/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/transformers/trainer_seq2seq.py", line 56, in __init__ |
|
super().__init__( |
|
File "/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/transformers/trainer.py", line 551, in __init__ |
|
self.init_git_repo(at_init=True) |
|
File "/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/transformers/trainer.py", line 3516, in init_git_repo |
|
self.repo.git_pull() |
|
File "/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/huggingface_hub/repository.py", line 990, in git_pull |
|
raise EnvironmentError(exc.stderr) |
|
OSError: error: cannot pull with rebase: You have unstaged changes. |
|
error: please commit or stash them. |
|
|
|
|