|
05/10/2023 09:41:07 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 2distributed training: True, 16-bits training: True |
|
05/10/2023 09:41:07 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( |
|
_n_gpu=2, |
|
adafactor=False, |
|
adam_beta1=0.9, |
|
adam_beta2=0.999, |
|
adam_epsilon=1e-08, |
|
auto_find_batch_size=False, |
|
bf16=False, |
|
bf16_full_eval=False, |
|
data_seed=None, |
|
dataloader_drop_last=False, |
|
dataloader_num_workers=0, |
|
dataloader_pin_memory=True, |
|
ddp_backend=None, |
|
ddp_bucket_cap_mb=None, |
|
ddp_find_unused_parameters=None, |
|
ddp_timeout=1800, |
|
debug=[], |
|
deepspeed=None, |
|
disable_tqdm=False, |
|
do_eval=True, |
|
do_predict=False, |
|
do_train=True, |
|
eval_accumulation_steps=None, |
|
eval_delay=0, |
|
eval_steps=1000, |
|
evaluation_strategy=steps, |
|
fp16=True, |
|
fp16_backend=auto, |
|
fp16_full_eval=False, |
|
fp16_opt_level=O1, |
|
fsdp=[], |
|
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, |
|
fsdp_min_num_params=0, |
|
fsdp_transformer_layer_cls_to_wrap=None, |
|
full_determinism=False, |
|
generation_config=None, |
|
generation_max_length=225, |
|
generation_num_beams=None, |
|
gradient_accumulation_steps=2, |
|
gradient_checkpointing=True, |
|
greater_is_better=False, |
|
group_by_length=False, |
|
half_precision_backend=auto, |
|
hub_model_id=None, |
|
hub_private_repo=False, |
|
hub_strategy=every_save, |
|
hub_token=<HUB_TOKEN>, |
|
ignore_data_skip=False, |
|
include_inputs_for_metrics=False, |
|
jit_mode_eval=False, |
|
label_names=None, |
|
label_smoothing_factor=0.0, |
|
learning_rate=1e-05, |
|
length_column_name=input_length, |
|
load_best_model_at_end=True, |
|
local_rank=0, |
|
log_level=passive, |
|
log_level_replica=warning, |
|
log_on_each_node=True, |
|
logging_dir=./runs/May10_09-41-06_crimv3mgpu016, |
|
logging_first_step=False, |
|
logging_nan_inf_filter=True, |
|
logging_steps=25, |
|
logging_strategy=steps, |
|
lr_scheduler_type=linear, |
|
max_grad_norm=1.0, |
|
max_steps=5000, |
|
metric_for_best_model=wer, |
|
mp_parameters=, |
|
no_cuda=False, |
|
num_train_epochs=3.0, |
|
optim=adamw_hf, |
|
optim_args=None, |
|
output_dir=./, |
|
overwrite_output_dir=True, |
|
past_index=-1, |
|
per_device_eval_batch_size=32, |
|
per_device_train_batch_size=32, |
|
predict_with_generate=True, |
|
prediction_loss_only=False, |
|
push_to_hub=True, |
|
push_to_hub_model_id=None, |
|
push_to_hub_organization=None, |
|
push_to_hub_token=<PUSH_TO_HUB_TOKEN>, |
|
ray_scope=last, |
|
remove_unused_columns=True, |
|
report_to=['wandb'], |
|
resume_from_checkpoint=None, |
|
run_name=./, |
|
save_on_each_node=False, |
|
save_safetensors=False, |
|
save_steps=1000, |
|
save_strategy=steps, |
|
save_total_limit=None, |
|
seed=42, |
|
sharded_ddp=[], |
|
skip_memory_metrics=True, |
|
sortish_sampler=False, |
|
tf32=None, |
|
torch_compile=False, |
|
torch_compile_backend=None, |
|
torch_compile_mode=None, |
|
torchdynamo=None, |
|
tpu_metrics_debug=False, |
|
tpu_num_cores=None, |
|
use_ipex=False, |
|
use_legacy_prediction_loop=False, |
|
use_mps_device=False, |
|
warmup_ratio=0.0, |
|
warmup_steps=500, |
|
weight_decay=0.0, |
|
xpu_backend=None, |
|
) |
|
05/10/2023 09:41:07 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments( |
|
_n_gpu=2, |
|
adafactor=False, |
|
adam_beta1=0.9, |
|
adam_beta2=0.999, |
|
adam_epsilon=1e-08, |
|
auto_find_batch_size=False, |
|
bf16=False, |
|
bf16_full_eval=False, |
|
data_seed=None, |
|
dataloader_drop_last=False, |
|
dataloader_num_workers=0, |
|
dataloader_pin_memory=True, |
|
ddp_backend=None, |
|
ddp_bucket_cap_mb=None, |
|
ddp_find_unused_parameters=None, |
|
ddp_timeout=1800, |
|
debug=[], |
|
deepspeed=None, |
|
disable_tqdm=False, |
|
do_eval=True, |
|
do_predict=False, |
|
do_train=True, |
|
eval_accumulation_steps=None, |
|
eval_delay=0, |
|
eval_steps=1000, |
|
evaluation_strategy=steps, |
|
fp16=True, |
|
fp16_backend=auto, |
|
fp16_full_eval=False, |
|
fp16_opt_level=O1, |
|
fsdp=[], |
|
fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False}, |
|
fsdp_min_num_params=0, |
|
fsdp_transformer_layer_cls_to_wrap=None, |
|
full_determinism=False, |
|
generation_config=None, |
|
generation_max_length=225, |
|
generation_num_beams=None, |
|
gradient_accumulation_steps=2, |
|
gradient_checkpointing=True, |
|
greater_is_better=False, |
|
group_by_length=False, |
|
half_precision_backend=auto, |
|
hub_model_id=None, |
|
hub_private_repo=False, |
|
hub_strategy=every_save, |
|
hub_token=<HUB_TOKEN>, |
|
ignore_data_skip=False, |
|
include_inputs_for_metrics=False, |
|
jit_mode_eval=False, |
|
label_names=None, |
|
label_smoothing_factor=0.0, |
|
learning_rate=1e-05, |
|
length_column_name=input_length, |
|
load_best_model_at_end=True, |
|
local_rank=0, |
|
log_level=passive, |
|
log_level_replica=warning, |
|
log_on_each_node=True, |
|
logging_dir=./runs/May10_09-41-06_crimv3mgpu016, |
|
logging_first_step=False, |
|
logging_nan_inf_filter=True, |
|
logging_steps=25, |
|
logging_strategy=steps, |
|
lr_scheduler_type=linear, |
|
max_grad_norm=1.0, |
|
max_steps=5000, |
|
metric_for_best_model=wer, |
|
mp_parameters=, |
|
no_cuda=False, |
|
num_train_epochs=3.0, |
|
optim=adamw_hf, |
|
optim_args=None, |
|
output_dir=./, |
|
overwrite_output_dir=True, |
|
past_index=-1, |
|
per_device_eval_batch_size=32, |
|
per_device_train_batch_size=32, |
|
predict_with_generate=True, |
|
prediction_loss_only=False, |
|
push_to_hub=True, |
|
push_to_hub_model_id=None, |
|
push_to_hub_organization=None, |
|
push_to_hub_token=<PUSH_TO_HUB_TOKEN>, |
|
ray_scope=last, |
|
remove_unused_columns=True, |
|
report_to=['wandb'], |
|
resume_from_checkpoint=None, |
|
run_name=./, |
|
save_on_each_node=False, |
|
save_safetensors=False, |
|
save_steps=1000, |
|
save_strategy=steps, |
|
save_total_limit=None, |
|
seed=42, |
|
sharded_ddp=[], |
|
skip_memory_metrics=True, |
|
sortish_sampler=False, |
|
tf32=None, |
|
torch_compile=False, |
|
torch_compile_backend=None, |
|
torch_compile_mode=None, |
|
torchdynamo=None, |
|
tpu_metrics_debug=False, |
|
tpu_num_cores=None, |
|
use_ipex=False, |
|
use_legacy_prediction_loop=False, |
|
use_mps_device=False, |
|
warmup_ratio=0.0, |
|
warmup_steps=500, |
|
weight_decay=0.0, |
|
xpu_backend=None, |
|
) |
|
[INFO|configuration_utils.py:669] 2023-05-10 09:41:19,641 >> loading configuration file config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/config.json |
|
[INFO|configuration_utils.py:725] 2023-05-10 09:41:19,653 >> Model config WhisperConfig { |
|
"_name_or_path": "openai/whisper-small", |
|
"activation_dropout": 0.0, |
|
"activation_function": "gelu", |
|
"apply_spec_augment": false, |
|
"architectures": [ |
|
"WhisperForConditionalGeneration" |
|
], |
|
"attention_dropout": 0.0, |
|
"begin_suppress_tokens": [ |
|
220, |
|
50257 |
|
], |
|
"bos_token_id": 50257, |
|
"classifier_proj_size": 256, |
|
"d_model": 768, |
|
"decoder_attention_heads": 12, |
|
"decoder_ffn_dim": 3072, |
|
"decoder_layerdrop": 0.0, |
|
"decoder_layers": 12, |
|
"decoder_start_token_id": 50258, |
|
"dropout": 0.0, |
|
"encoder_attention_heads": 12, |
|
"encoder_ffn_dim": 3072, |
|
"encoder_layerdrop": 0.0, |
|
"encoder_layers": 12, |
|
"eos_token_id": 50257, |
|
"forced_decoder_ids": [ |
|
[ |
|
1, |
|
50259 |
|
], |
|
[ |
|
2, |
|
50359 |
|
], |
|
[ |
|
3, |
|
50363 |
|
] |
|
], |
|
"init_std": 0.02, |
|
"is_encoder_decoder": true, |
|
"mask_feature_length": 10, |
|
"mask_feature_min_masks": 0, |
|
"mask_feature_prob": 0.0, |
|
"mask_time_length": 10, |
|
"mask_time_min_masks": 2, |
|
"mask_time_prob": 0.05, |
|
"max_length": 448, |
|
"max_source_positions": 1500, |
|
"max_target_positions": 448, |
|
"model_type": "whisper", |
|
"num_hidden_layers": 12, |
|
"num_mel_bins": 80, |
|
"pad_token_id": 50257, |
|
"scale_embedding": false, |
|
"suppress_tokens": [ |
|
1, |
|
2, |
|
7, |
|
8, |
|
9, |
|
10, |
|
14, |
|
25, |
|
26, |
|
27, |
|
28, |
|
29, |
|
31, |
|
58, |
|
59, |
|
60, |
|
61, |
|
62, |
|
63, |
|
90, |
|
91, |
|
92, |
|
93, |
|
359, |
|
503, |
|
522, |
|
542, |
|
873, |
|
893, |
|
902, |
|
918, |
|
922, |
|
931, |
|
1350, |
|
1853, |
|
1982, |
|
2460, |
|
2627, |
|
3246, |
|
3253, |
|
3268, |
|
3536, |
|
3846, |
|
3961, |
|
4183, |
|
4667, |
|
6585, |
|
6647, |
|
7273, |
|
9061, |
|
9383, |
|
10428, |
|
10929, |
|
11938, |
|
12033, |
|
12331, |
|
12562, |
|
13793, |
|
14157, |
|
14635, |
|
15265, |
|
15618, |
|
16553, |
|
16604, |
|
18362, |
|
18956, |
|
20075, |
|
21675, |
|
22520, |
|
26130, |
|
26161, |
|
26435, |
|
28279, |
|
29464, |
|
31650, |
|
32302, |
|
32470, |
|
36865, |
|
42863, |
|
47425, |
|
49870, |
|
50254, |
|
50258, |
|
50360, |
|
50361, |
|
50362 |
|
], |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.29.0.dev0", |
|
"use_cache": true, |
|
"use_weighted_layer_sum": false, |
|
"vocab_size": 51865 |
|
} |
|
|
|
[INFO|feature_extraction_utils.py:469] 2023-05-10 09:41:19,843 >> loading configuration file preprocessor_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/preprocessor_config.json |
|
[INFO|feature_extraction_utils.py:511] 2023-05-10 09:41:19,849 >> Feature extractor WhisperFeatureExtractor { |
|
"chunk_length": 30, |
|
"feature_extractor_type": "WhisperFeatureExtractor", |
|
"feature_size": 80, |
|
"hop_length": 160, |
|
"n_fft": 400, |
|
"n_samples": 480000, |
|
"nb_max_frames": 3000, |
|
"padding_side": "right", |
|
"padding_value": 0.0, |
|
"processor_class": "WhisperProcessor", |
|
"return_attention_mask": false, |
|
"sampling_rate": 16000 |
|
} |
|
|
|
[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file vocab.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/vocab.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file tokenizer.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/tokenizer.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file merges.txt from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/merges.txt |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file normalizer.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/normalizer.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file added_tokens.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/added_tokens.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file special_tokens_map.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/special_tokens_map.json |
|
[INFO|tokenization_utils_base.py:1810] 2023-05-10 09:41:20,054 >> loading file tokenizer_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/tokenizer_config.json |
|
[INFO|modeling_utils.py:2542] 2023-05-10 09:41:20,144 >> loading weights file pytorch_model.bin from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/pytorch_model.bin |
|
[INFO|configuration_utils.py:577] 2023-05-10 09:41:20,754 >> Generate config GenerationConfig { |
|
"_from_model_config": true, |
|
"begin_suppress_tokens": [ |
|
220, |
|
50257 |
|
], |
|
"bos_token_id": 50257, |
|
"decoder_start_token_id": 50258, |
|
"eos_token_id": 50257, |
|
"max_length": 448, |
|
"pad_token_id": 50257, |
|
"transformers_version": "4.29.0.dev0", |
|
"use_cache": false |
|
} |
|
|
|
[INFO|modeling_utils.py:3211] 2023-05-10 09:41:23,296 >> All model checkpoint weights were used when initializing WhisperForConditionalGeneration. |
|
|
|
[INFO|modeling_utils.py:3219] 2023-05-10 09:41:23,296 >> All the weights of WhisperForConditionalGeneration were initialized from the model checkpoint at openai/whisper-small. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use WhisperForConditionalGeneration for predictions without further training. |
|
[INFO|configuration_utils.py:539] 2023-05-10 09:41:23,501 >> loading configuration file generation_config.json from cache at /home/local/QCRI/dizham/.cache/huggingface/hub/models--openai--whisper-small/snapshots/f6744499d1eba717bcf4d6be735e3d386ffb60ad/generation_config.json |
|
[INFO|configuration_utils.py:577] 2023-05-10 09:41:23,502 >> Generate config GenerationConfig { |
|
"begin_suppress_tokens": [ |
|
220, |
|
50257 |
|
], |
|
"bos_token_id": 50257, |
|
"decoder_start_token_id": 50258, |
|
"eos_token_id": 50257, |
|
"forced_decoder_ids": [ |
|
[ |
|
1, |
|
null |
|
], |
|
[ |
|
2, |
|
50359 |
|
] |
|
], |
|
"is_multilingual": true, |
|
"lang_to_id": { |
|
"<|af|>": 50327, |
|
"<|am|>": 50334, |
|
"<|ar|>": 50272, |
|
"<|as|>": 50350, |
|
"<|az|>": 50304, |
|
"<|ba|>": 50355, |
|
"<|be|>": 50330, |
|
"<|bg|>": 50292, |
|
"<|bn|>": 50302, |
|
"<|bo|>": 50347, |
|
"<|br|>": 50309, |
|
"<|bs|>": 50315, |
|
"<|ca|>": 50270, |
|
"<|cs|>": 50283, |
|
"<|cy|>": 50297, |
|
"<|da|>": 50285, |
|
"<|de|>": 50261, |
|
"<|el|>": 50281, |
|
"<|en|>": 50259, |
|
"<|es|>": 50262, |
|
"<|et|>": 50307, |
|
"<|eu|>": 50310, |
|
"<|fa|>": 50300, |
|
"<|fi|>": 50277, |
|
"<|fo|>": 50338, |
|
"<|fr|>": 50265, |
|
"<|gl|>": 50319, |
|
"<|gu|>": 50333, |
|
"<|haw|>": 50352, |
|
"<|ha|>": 50354, |
|
"<|he|>": 50279, |
|
"<|hi|>": 50276, |
|
"<|hr|>": 50291, |
|
"<|ht|>": 50339, |
|
"<|hu|>": 50286, |
|
"<|hy|>": 50312, |
|
"<|id|>": 50275, |
|
"<|is|>": 50311, |
|
"<|it|>": 50274, |
|
"<|ja|>": 50266, |
|
"<|jw|>": 50356, |
|
"<|ka|>": 50329, |
|
"<|kk|>": 50316, |
|
"<|km|>": 50323, |
|
"<|kn|>": 50306, |
|
"<|ko|>": 50264, |
|
"<|la|>": 50294, |
|
"<|lb|>": 50345, |
|
"<|ln|>": 50353, |
|
"<|lo|>": 50336, |
|
"<|lt|>": 50293, |
|
"<|lv|>": 50301, |
|
"<|mg|>": 50349, |
|
"<|mi|>": 50295, |
|
"<|mk|>": 50308, |
|
"<|ml|>": 50296, |
|
"<|mn|>": 50314, |
|
"<|mr|>": 50320, |
|
"<|ms|>": 50282, |
|
"<|mt|>": 50343, |
|
"<|my|>": 50346, |
|
"<|ne|>": 50313, |
|
"<|nl|>": 50271, |
|
"<|nn|>": 50342, |
|
"<|no|>": 50288, |
|
"<|oc|>": 50328, |
|
"<|pa|>": 50321, |
|
"<|pl|>": 50269, |
|
"<|ps|>": 50340, |
|
"<|pt|>": 50267, |
|
"<|ro|>": 50284, |
|
"<|ru|>": 50263, |
|
"<|sa|>": 50344, |
|
"<|sd|>": 50332, |
|
"<|si|>": 50322, |
|
"<|sk|>": 50298, |
|
"<|sl|>": 50305, |
|
"<|sn|>": 50324, |
|
"<|so|>": 50326, |
|
"<|sq|>": 50317, |
|
"<|sr|>": 50303, |
|
"<|su|>": 50357, |
|
"<|sv|>": 50273, |
|
"<|sw|>": 50318, |
|
"<|ta|>": 50287, |
|
"<|te|>": 50299, |
|
"<|tg|>": 50331, |
|
"<|th|>": 50289, |
|
"<|tk|>": 50341, |
|
"<|tl|>": 50348, |
|
"<|tr|>": 50268, |
|
"<|tt|>": 50351, |
|
"<|uk|>": 50280, |
|
"<|ur|>": 50290, |
|
"<|uz|>": 50337, |
|
"<|vi|>": 50278, |
|
"<|yi|>": 50335, |
|
"<|yo|>": 50325, |
|
"<|zh|>": 50260 |
|
}, |
|
"max_initial_timestamp_index": 1, |
|
"max_length": 448, |
|
"no_timestamps_token_id": 50363, |
|
"pad_token_id": 50257, |
|
"return_timestamps": false, |
|
"suppress_tokens": [ |
|
1, |
|
2, |
|
7, |
|
8, |
|
9, |
|
10, |
|
14, |
|
25, |
|
26, |
|
27, |
|
28, |
|
29, |
|
31, |
|
58, |
|
59, |
|
60, |
|
61, |
|
62, |
|
63, |
|
90, |
|
91, |
|
92, |
|
93, |
|
359, |
|
503, |
|
522, |
|
542, |
|
873, |
|
893, |
|
902, |
|
918, |
|
922, |
|
931, |
|
1350, |
|
1853, |
|
1982, |
|
2460, |
|
2627, |
|
3246, |
|
3253, |
|
3268, |
|
3536, |
|
3846, |
|
3961, |
|
4183, |
|
4667, |
|
6585, |
|
6647, |
|
7273, |
|
9061, |
|
9383, |
|
10428, |
|
10929, |
|
11938, |
|
12033, |
|
12331, |
|
12562, |
|
13793, |
|
14157, |
|
14635, |
|
15265, |
|
15618, |
|
16553, |
|
16604, |
|
18362, |
|
18956, |
|
20075, |
|
21675, |
|
22520, |
|
26130, |
|
26161, |
|
26435, |
|
28279, |
|
29464, |
|
31650, |
|
32302, |
|
32470, |
|
36865, |
|
42863, |
|
47425, |
|
49870, |
|
50254, |
|
50258, |
|
50358, |
|
50359, |
|
50360, |
|
50361, |
|
50362 |
|
], |
|
"task_to_id": { |
|
"transcribe": 50359, |
|
"translate": 50358 |
|
}, |
|
"transformers_version": "4.29.0.dev0" |
|
} |
|
|
|
[INFO|feature_extraction_utils.py:369] 2023-05-10 09:41:24,661 >> Feature extractor saved in ./preprocessor_config.json |
|
[INFO|tokenization_utils_base.py:2181] 2023-05-10 09:41:24,666 >> tokenizer config file saved in ./tokenizer_config.json |
|
[INFO|tokenization_utils_base.py:2188] 2023-05-10 09:41:24,671 >> Special tokens file saved in ./special_tokens_map.json |
|
[INFO|configuration_utils.py:458] 2023-05-10 09:41:24,826 >> Configuration saved in ./config.json |
|
[INFO|image_processing_utils.py:307] 2023-05-10 09:41:24,827 >> loading configuration file ./preprocessor_config.json |
|
[INFO|feature_extraction_utils.py:467] 2023-05-10 09:41:24,834 >> loading configuration file ./preprocessor_config.json |
|
[INFO|feature_extraction_utils.py:511] 2023-05-10 09:41:24,835 >> Feature extractor WhisperFeatureExtractor { |
|
"chunk_length": 30, |
|
"feature_extractor_type": "WhisperFeatureExtractor", |
|
"feature_size": 80, |
|
"hop_length": 160, |
|
"n_fft": 400, |
|
"n_samples": 480000, |
|
"nb_max_frames": 3000, |
|
"padding_side": "right", |
|
"padding_value": 0.0, |
|
"processor_class": "WhisperProcessor", |
|
"return_attention_mask": false, |
|
"sampling_rate": 16000 |
|
} |
|
|
|
[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file vocab.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file tokenizer.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file merges.txt |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file normalizer.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file added_tokens.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file special_tokens_map.json |
|
[INFO|tokenization_utils_base.py:1808] 2023-05-10 09:41:24,836 >> loading file tokenizer_config.json |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|startoftranscript|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|en|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|zh|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|de|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|es|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ru|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ko|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|fr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ja|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|pt|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|tr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|pl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ca|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|nl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ar|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|sv|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|it|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|id|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|hi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|fi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|vi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|he|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|uk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|el|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ms|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|cs|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ro|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|da|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|hu|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|ta|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|no|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,898 >> Adding <|th|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|ur|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|hr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|bg|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|lt|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|la|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|mi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|ml|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|cy|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|te|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|fa|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|lv|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|bn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|az|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|kn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|et|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|mk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|br|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|eu|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|is|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|hy|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|ne|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|mn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|bs|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|kk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sq|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sw|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|gl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|mr|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|pa|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|si|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|km|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|sn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|yo|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|so|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|af|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|oc|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|ka|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|be|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,899 >> Adding <|tg|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|sd|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|gu|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|am|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|yi|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|lo|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|uz|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|fo|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ht|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ps|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|tk|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|nn|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|mt|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|sa|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|lb|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|my|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|bo|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|tl|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|mg|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|as|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|tt|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|haw|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ln|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ha|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|ba|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|jw|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|su|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|translate|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|transcribe|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|startoflm|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|startofprev|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|nocaptions|> to the vocabulary |
|
[INFO|tokenization_utils.py:426] 2023-05-10 09:41:24,900 >> Adding <|notimestamps|> to the vocabulary |
|
/home/local/QCRI/dizham/kanari/whisper/whisper-small-es/./ is already a clone of https://huggingface.co/danielizham/whisper-small-es. Make sure you pull the latest changes with `repo.git_pull()`. |
|
05/10/2023 09:41:27 - WARNING - huggingface_hub.repository - /home/local/QCRI/dizham/kanari/whisper/whisper-small-es/./ is already a clone of https://huggingface.co/danielizham/whisper-small-es. Make sure you pull the latest changes with `repo.git_pull()`. |
|
[INFO|trainer.py:565] 2023-05-10 09:41:30,128 >> max_steps is given, it will override any value given in num_train_epochs |
|
[INFO|trainer.py:622] 2023-05-10 09:41:30,129 >> Using cuda_amp half precision backend |
|
/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/transformers/optimization.py:407: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning |
|
warnings.warn( |
|
[INFO|trainer.py:1771] 2023-05-10 09:41:30,142 >> ***** Running training ***** |
|
[INFO|trainer.py:1772] 2023-05-10 09:41:30,142 >> Num examples = 640,000 |
|
[INFO|trainer.py:1773] 2023-05-10 09:41:30,142 >> Num Epochs = 9,223,372,036,854,775,807 |
|
[INFO|trainer.py:1774] 2023-05-10 09:41:30,142 >> Instantaneous batch size per device = 32 |
|
[INFO|trainer.py:1775] 2023-05-10 09:41:30,142 >> Total train batch size (w. parallel, distributed & accumulation) = 128 |
|
[INFO|trainer.py:1776] 2023-05-10 09:41:30,142 >> Gradient Accumulation steps = 2 |
|
[INFO|trainer.py:1777] 2023-05-10 09:41:30,142 >> Total optimization steps = 5,000 |
|
[INFO|trainer.py:1778] 2023-05-10 09:41:30,143 >> Number of trainable parameters = 241,734,912 |
|
[INFO|integrations.py:720] 2023-05-10 09:41:30,144 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true" |
|
wandb: Currently logged in as: danielizham. Use `wandb login --relogin` to force relogin |
|
wandb: Tracking run with wandb version 0.15.2 |
|
wandb: Run data is saved locally in /home/local/QCRI/dizham/kanari/whisper/whisper-small-es/wandb/run-20230510_094132-lvsln7ks |
|
wandb: Run `wandb offline` to turn off syncing. |
|
wandb: Syncing run astral-silence-4 |
|
wandb: βοΈ View project at https://wandb.ai/danielizham/huggingface |
|
wandb: π View run at https://wandb.ai/danielizham/huggingface/runs/lvsln7ks |
|
0%| | 0/5000 [00:00<?, ?it/s] |
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:02, 2.78s/it][A |
|
Reading metadata...: 14937it [00:02, 7305.51it/s][A |
|
Reading metadata...: 23723it [00:05, 4371.05it/s][A |
|
Reading metadata...: 37962it [00:05, 8709.27it/s][A |
|
Reading metadata...: 46554it [00:08, 5784.42it/s][A |
|
Reading metadata...: 59039it [00:10, 5814.78it/s][A |
|
Reading metadata...: 73124it [00:10, 9223.48it/s][A |
|
Reading metadata...: 80356it [00:11, 10494.41it/s][A |
|
Reading metadata...: 94298it [00:11, 16128.19it/s][A |
|
Reading metadata...: 102493it [00:13, 8675.48it/s][A |
|
Reading metadata...: 114047it [00:13, 11209.00it/s][A |
|
Reading metadata...: 127830it [00:14, 16692.31it/s][A |
|
Reading metadata...: 135411it [00:14, 17563.84it/s][A |
|
Reading metadata...: 149333it [00:14, 25797.66it/s][A |
|
Reading metadata...: 157683it [00:16, 10642.17it/s][A |
|
Reading metadata...: 168342it [00:17, 13126.73it/s][A |
|
Reading metadata...: 180109it [00:17, 18417.11it/s][A |
|
Reading metadata...: 187150it [00:20, 7507.64it/s] [A |
|
Reading metadata...: 201989it [00:20, 12006.07it/s][A |
|
Reading metadata...: 209942it [00:22, 7783.60it/s] [A |
|
Reading metadata...: 221914it [00:23, 9303.36it/s][A
Reading metadata...: 230467it [00:23, 9864.38it/s] |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:06, 6.97s/it][A
Reading metadata...: 15520it [00:07, 2194.89it/s] |
|
[INFO|trainer_utils.py:693] 2023-05-10 09:43:41,176 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. |
|
/home/local/QCRI/dizham/miniconda3/envs/whisper/lib/python3.9/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector. |
|
warnings.warn('Was asked to gather along dimension 0, but all ' |
|
0%| | 1/5000 [02:24<200:03:10, 144.07s/it]
0%| | 2/5000 [02:54<107:22:57, 77.35s/it]
0%| | 3/5000 [03:22<76:05:16, 54.82s/it]
0%| | 4/5000 [03:52<62:10:56, 44.81s/it]
0%| | 5/5000 [04:19<53:36:03, 38.63s/it]
0%| | 6/5000 [04:49<49:12:44, 35.48s/it]
0%| | 7/5000 [05:16<45:26:55, 32.77s/it]
0%| | 8/5000 [05:45<43:41:35, 31.51s/it]
0%| | 9/5000 [06:12<41:42:07, 30.08s/it]
0%| | 10/5000 [06:41<41:26:32, 29.90s/it]
0%| | 11/5000 [07:08<40:12:15, 29.01s/it]
0%| | 12/5000 [07:39<40:55:33, 29.54s/it]
0%| | 13/5000 [08:09<41:07:16, 29.68s/it]
0%| | 14/5000 [08:36<40:00:34, 28.89s/it]
0%| | 15/5000 [09:06<40:32:59, 29.28s/it]
0%| | 16/5000 [09:33<39:37:45, 28.62s/it]
0%| | 17/5000 [10:03<40:16:19, 29.09s/it]
0%| | 18/5000 [10:39<43:07:28, 31.16s/it]
0%| | 19/5000 [11:06<41:22:59, 29.91s/it]
0%| | 20/5000 [11:37<41:28:17, 29.98s/it]
0%| | 21/5000 [12:04<40:16:22, 29.12s/it]
0%| | 22/5000 [12:34<40:42:41, 29.44s/it]
0%| | 23/5000 [13:01<39:32:57, 28.61s/it]
0%| | 24/5000 [13:30<40:01:18, 28.95s/it]
0%| | 25/5000 [13:57<39:16:32, 28.42s/it]
0%| | 25/5000 [13:57<39:16:32, 28.42s/it]
1%| | 26/5000 [14:25<38:51:21, 28.12s/it]
1%| | 27/5000 [14:55<39:32:56, 28.63s/it]
1%| | 28/5000 [15:23<39:13:02, 28.40s/it]
1%| | 29/5000 [15:50<38:58:56, 28.23s/it]
1%| | 30/5000 [16:19<38:59:29, 28.24s/it]
1%| | 31/5000 [16:48<39:33:35, 28.66s/it]
1%| | 32/5000 [17:17<39:24:55, 28.56s/it]
1%| | 33/5000 [17:50<41:29:21, 30.07s/it]
1%| | 34/5000 [18:20<41:10:06, 29.84s/it]
1%| | 35/5000 [18:51<42:01:33, 30.47s/it]
1%| | 36/5000 [19:19<40:41:05, 29.51s/it]
1%| | 37/5000 [19:46<39:54:21, 28.95s/it]
1%| | 38/5000 [20:14<39:24:32, 28.59s/it]
1%| | 39/5000 [20:41<38:45:04, 28.12s/it]
1%| | 40/5000 [21:11<39:39:26, 28.78s/it]
1%| | 41/5000 [21:39<39:08:27, 28.41s/it]
1%| | 42/5000 [22:06<38:41:21, 28.09s/it]
1%| | 43/5000 [22:42<41:44:40, 30.32s/it]
1%| | 44/5000 [23:09<40:21:48, 29.32s/it]
1%| | 45/5000 [23:36<39:31:21, 28.71s/it]
1%| | 46/5000 [24:04<38:58:42, 28.33s/it]
1%| | 47/5000 [24:31<38:38:47, 28.09s/it]
1%| | 48/5000 [25:02<39:48:27, 28.94s/it]
1%| | 49/5000 [25:31<39:52:07, 28.99s/it]
1%| | 50/5000 [26:04<41:27:08, 30.15s/it]
1%| | 50/5000 [26:04<41:27:08, 30.15s/it]
1%| | 51/5000 [26:31<40:16:34, 29.30s/it]
1%| | 52/5000 [27:01<40:28:01, 29.44s/it]
1%| | 53/5000 [27:29<39:51:16, 29.00s/it]
1%| | 54/5000 [27:57<39:29:05, 28.74s/it]
1%| | 55/5000 [28:25<39:07:40, 28.49s/it]
1%| | 56/5000 [29:03<42:57:48, 31.28s/it]
1%| | 57/5000 [29:30<41:09:56, 29.98s/it]
1%| | 58/5000 [30:01<41:46:03, 30.43s/it]
1%| | 59/5000 [30:29<40:26:51, 29.47s/it]
1%| | 60/5000 [30:57<40:00:21, 29.15s/it]
1%| | 61/5000 [31:24<39:11:00, 28.56s/it]
1%| | 62/5000 [31:51<38:36:45, 28.15s/it]
1%|β | 63/5000 [32:19<38:13:12, 27.87s/it]
1%|β | 64/5000 [32:45<37:33:23, 27.39s/it]
1%|β | 65/5000 [33:12<37:29:43, 27.35s/it]
1%|β | 66/5000 [33:40<37:30:33, 27.37s/it]
1%|β | 67/5000 [34:07<37:35:52, 27.44s/it]
1%|β | 68/5000 [34:35<37:44:18, 27.55s/it]
1%|β | 69/5000 [35:05<38:34:41, 28.17s/it]
1%|β | 70/5000 [35:33<38:41:01, 28.25s/it]
1%|β | 71/5000 [36:04<39:45:54, 29.04s/it]
1%|β | 72/5000 [36:33<39:57:03, 29.18s/it]
1%|β | 73/5000 [37:01<39:17:44, 28.71s/it]
1%|β | 74/5000 [37:35<41:24:44, 30.26s/it]
2%|β | 75/5000 [38:05<41:31:52, 30.36s/it]
2%|β | 75/5000 [38:05<41:31:52, 30.36s/it]
2%|β | 76/5000 [38:34<40:37:48, 29.71s/it]
2%|β | 77/5000 [39:03<40:31:44, 29.64s/it]
2%|β | 78/5000 [39:33<40:48:16, 29.84s/it]
2%|β | 79/5000 [40:07<42:19:07, 30.96s/it]
2%|β | 80/5000 [40:34<40:46:52, 29.84s/it]
2%|β | 81/5000 [41:03<40:32:12, 29.67s/it]
2%|β | 82/5000 [41:31<39:46:41, 29.12s/it]
2%|β | 83/5000 [42:05<41:31:05, 30.40s/it]
2%|β | 84/5000 [42:32<40:02:43, 29.33s/it]
2%|β | 85/5000 [43:01<39:59:18, 29.29s/it]
2%|β | 86/5000 [43:28<38:59:26, 28.56s/it]
2%|β | 87/5000 [44:06<42:53:09, 31.42s/it]
2%|β | 88/5000 [44:43<45:26:03, 33.30s/it]
2%|β | 89/5000 [45:10<42:50:12, 31.40s/it]
2%|β | 90/5000 [45:41<42:30:02, 31.16s/it]
2%|β | 91/5000 [46:08<40:59:26, 30.06s/it]
2%|β | 92/5000 [46:41<42:10:18, 30.93s/it]
2%|β | 93/5000 [47:13<42:35:51, 31.25s/it]
2%|β | 94/5000 [47:41<40:53:55, 30.01s/it]
2%|β | 95/5000 [48:10<40:45:16, 29.91s/it]
2%|β | 96/5000 [48:37<39:36:05, 29.07s/it]
2%|β | 97/5000 [49:07<39:53:30, 29.29s/it]
2%|β | 98/5000 [49:35<39:22:36, 28.92s/it]
2%|β | 99/5000 [50:05<39:34:07, 29.07s/it]
2%|β | 100/5000 [50:33<39:07:41, 28.75s/it]
2%|β | 100/5000 [50:33<39:07:41, 28.75s/it]
2%|β | 101/5000 [51:03<39:37:29, 29.12s/it]
2%|β | 102/5000 [51:32<39:46:16, 29.23s/it]
2%|β | 103/5000 [52:03<40:16:49, 29.61s/it]
2%|β | 104/5000 [52:30<39:16:13, 28.88s/it]
2%|β | 105/5000 [53:05<41:54:06, 30.82s/it]
2%|β | 106/5000 [53:33<40:50:31, 30.04s/it]
2%|β | 107/5000 [54:02<40:24:41, 29.73s/it]
2%|β | 108/5000 [54:30<39:36:56, 29.15s/it]
2%|β | 109/5000 [55:00<39:44:16, 29.25s/it]
2%|β | 110/5000 [55:28<39:30:49, 29.09s/it]
2%|β | 111/5000 [55:58<39:43:10, 29.25s/it]
2%|β | 112/5000 [56:26<39:08:06, 28.82s/it]
2%|β | 113/5000 [56:59<40:59:40, 30.20s/it]
2%|β | 114/5000 [57:26<39:36:32, 29.18s/it]
2%|β | 115/5000 [57:57<40:11:03, 29.61s/it]
2%|β | 116/5000 [58:23<38:56:05, 28.70s/it]
2%|β | 117/5000 [58:50<38:19:58, 28.26s/it]
2%|β | 118/5000 [59:19<38:27:04, 28.35s/it]
2%|β | 119/5000 [59:49<39:01:08, 28.78s/it]
2%|β | 120/5000 [1:00:16<38:35:47, 28.47s/it]
2%|β | 121/5000 [1:00:46<39:11:40, 28.92s/it]
2%|β | 122/5000 [1:01:14<38:41:56, 28.56s/it]
2%|β | 123/5000 [1:01:42<38:21:39, 28.32s/it]
2%|β | 124/5000 [1:02:09<37:55:13, 28.00s/it]
2%|β | 125/5000 [1:02:40<39:01:31, 28.82s/it]
2%|β | 125/5000 [1:02:40<39:01:31, 28.82s/it]
3%|β | 126/5000 [1:03:08<38:39:31, 28.55s/it]
3%|β | 127/5000 [1:03:37<39:03:46, 28.86s/it]
3%|β | 128/5000 [1:04:06<39:06:47, 28.90s/it]
3%|β | 129/5000 [1:04:42<41:43:08, 30.83s/it]
3%|β | 130/5000 [1:05:10<40:51:44, 30.21s/it]
3%|β | 131/5000 [1:05:40<40:30:48, 29.95s/it]
3%|β | 132/5000 [1:06:08<39:35:09, 29.27s/it]
3%|β | 133/5000 [1:06:36<39:11:03, 28.98s/it]
3%|β | 134/5000 [1:07:04<38:48:37, 28.71s/it]
3%|β | 135/5000 [1:07:32<38:29:33, 28.48s/it]
3%|β | 136/5000 [1:08:00<38:24:21, 28.43s/it]
3%|β | 137/5000 [1:08:27<37:53:50, 28.05s/it]
3%|β | 138/5000 [1:08:57<38:21:46, 28.41s/it]
3%|β | 139/5000 [1:09:32<41:20:50, 30.62s/it]
3%|β | 140/5000 [1:10:00<40:14:41, 29.81s/it]
3%|β | 141/5000 [1:10:30<40:20:22, 29.89s/it]
3%|β | 142/5000 [1:10:57<39:11:04, 29.04s/it]
3%|β | 143/5000 [1:11:28<39:57:00, 29.61s/it]
3%|β | 144/5000 [1:11:56<39:00:59, 28.93s/it]
3%|β | 145/5000 [1:12:26<39:24:42, 29.22s/it]
3%|β | 146/5000 [1:12:54<39:06:52, 29.01s/it]
3%|β | 147/5000 [1:13:22<38:44:26, 28.74s/it]
3%|β | 148/5000 [1:13:49<37:52:18, 28.10s/it]
3%|β | 149/5000 [1:14:18<38:28:23, 28.55s/it]
3%|β | 150/5000 [1:14:46<38:01:15, 28.22s/it]
3%|β | 150/5000 [1:14:46<38:01:15, 28.22s/it]
3%|β | 151/5000 [1:15:13<37:42:14, 27.99s/it]
3%|β | 152/5000 [1:15:48<40:16:30, 29.91s/it]
3%|β | 153/5000 [1:16:16<39:25:12, 29.28s/it]
3%|β | 154/5000 [1:16:43<38:45:06, 28.79s/it]
3%|β | 155/5000 [1:17:10<38:00:43, 28.24s/it]
3%|β | 156/5000 [1:17:38<37:59:49, 28.24s/it]
3%|β | 157/5000 [1:18:07<38:18:46, 28.48s/it]
3%|β | 158/5000 [1:18:35<38:03:05, 28.29s/it]
3%|β | 159/5000 [1:19:03<37:52:46, 28.17s/it]
3%|β | 160/5000 [1:19:33<38:26:07, 28.59s/it]
3%|β | 161/5000 [1:20:00<38:05:37, 28.34s/it]
3%|β | 162/5000 [1:20:28<37:36:59, 27.99s/it]
3%|β | 163/5000 [1:20:55<37:31:20, 27.93s/it]
3%|β | 164/5000 [1:21:23<37:24:09, 27.84s/it]
3%|β | 165/5000 [1:21:50<37:04:35, 27.61s/it]
3%|β | 166/5000 [1:22:18<37:08:43, 27.66s/it]
3%|β | 167/5000 [1:22:45<36:59:38, 27.56s/it]
3%|β | 168/5000 [1:23:14<37:16:54, 27.78s/it]
3%|β | 169/5000 [1:23:41<37:03:58, 27.62s/it]
3%|β | 170/5000 [1:24:11<38:12:34, 28.48s/it]
3%|β | 171/5000 [1:24:40<38:22:43, 28.61s/it]
3%|β | 172/5000 [1:25:09<38:36:28, 28.79s/it]
3%|β | 173/5000 [1:25:37<38:12:00, 28.49s/it]
3%|β | 174/5000 [1:26:08<38:57:16, 29.06s/it]
4%|β | 175/5000 [1:26:36<38:31:03, 28.74s/it]
4%|β | 175/5000 [1:26:36<38:31:03, 28.74s/it]
4%|β | 176/5000 [1:27:03<37:59:57, 28.36s/it]
4%|β | 177/5000 [1:27:31<37:46:21, 28.19s/it]
4%|β | 178/5000 [1:28:00<38:18:46, 28.60s/it]
4%|β | 179/5000 [1:28:28<37:50:52, 28.26s/it]
4%|β | 180/5000 [1:28:58<38:23:11, 28.67s/it]
4%|β | 181/5000 [1:29:28<39:12:56, 29.30s/it]
4%|β | 182/5000 [1:29:56<38:26:11, 28.72s/it]
4%|β | 183/5000 [1:30:25<38:45:28, 28.97s/it]
4%|β | 184/5000 [1:30:56<39:39:35, 29.65s/it]
4%|β | 185/5000 [1:31:26<39:41:49, 29.68s/it]
4%|β | 186/5000 [1:31:55<39:22:25, 29.44s/it]
4%|β | 187/5000 [1:32:25<39:31:26, 29.56s/it]
4%|β | 188/5000 [1:32:53<39:01:26, 29.20s/it]
4%|β | 189/5000 [1:33:23<39:15:20, 29.37s/it]
4%|β | 190/5000 [1:33:54<39:43:38, 29.73s/it]
4%|β | 191/5000 [1:34:21<38:42:36, 28.98s/it]
4%|β | 192/5000 [1:34:53<40:05:43, 30.02s/it]
4%|β | 193/5000 [1:35:24<40:14:02, 30.13s/it]
4%|β | 194/5000 [1:35:53<39:46:50, 29.80s/it]
4%|β | 195/5000 [1:36:24<40:14:35, 30.15s/it]
4%|β | 196/5000 [1:36:51<39:05:26, 29.29s/it]
4%|β | 197/5000 [1:37:21<39:16:34, 29.44s/it]
4%|β | 198/5000 [1:37:53<40:17:59, 30.21s/it]
4%|β | 199/5000 [1:38:20<39:04:42, 29.30s/it]
4%|β | 200/5000 [1:38:53<40:24:10, 30.30s/it]
4%|β | 200/5000 [1:38:53<40:24:10, 30.30s/it]
4%|β | 201/5000 [1:39:20<39:11:20, 29.40s/it]
4%|β | 202/5000 [1:39:52<40:18:48, 30.25s/it]
4%|β | 203/5000 [1:40:19<39:01:40, 29.29s/it]
4%|β | 204/5000 [1:40:51<39:51:14, 29.92s/it]
4%|β | 205/5000 [1:41:24<41:21:24, 31.05s/it]
4%|β | 206/5000 [1:41:54<40:40:34, 30.55s/it]
4%|β | 207/5000 [1:42:23<40:06:08, 30.12s/it]
4%|β | 208/5000 [1:42:53<40:04:48, 30.11s/it]
4%|β | 209/5000 [1:43:23<40:03:05, 30.10s/it]
4%|β | 210/5000 [1:43:55<40:49:29, 30.68s/it]
4%|β | 211/5000 [1:44:22<39:29:45, 29.69s/it]
4%|β | 212/5000 [1:44:55<40:39:13, 30.57s/it]
4%|β | 213/5000 [1:45:22<39:17:59, 29.55s/it]
4%|β | 214/5000 [1:45:54<40:10:39, 30.22s/it]
4%|β | 215/5000 [1:46:20<38:41:43, 29.11s/it]
4%|β | 216/5000 [1:46:52<39:48:04, 29.95s/it]
4%|β | 217/5000 [1:47:19<38:38:33, 29.09s/it]
4%|β | 218/5000 [1:47:50<39:05:00, 29.42s/it]
4%|β | 219/5000 [1:48:22<40:14:58, 30.31s/it]
4%|β | 220/5000 [1:48:49<39:01:12, 29.39s/it]
4%|β | 221/5000 [1:49:28<42:39:21, 32.13s/it]
4%|β | 222/5000 [1:49:54<40:22:58, 30.43s/it]
4%|β | 223/5000 [1:50:25<40:35:11, 30.59s/it]
4%|β | 224/5000 [1:50:55<40:25:15, 30.47s/it]
4%|β | 225/5000 [1:51:23<39:09:10, 29.52s/it]
4%|β | 225/5000 [1:51:23<39:09:10, 29.52s/it]
5%|β | 226/5000 [1:51:55<40:10:10, 30.29s/it]
5%|β | 227/5000 [1:52:22<38:54:39, 29.35s/it]
5%|β | 228/5000 [1:52:54<39:55:45, 30.12s/it]
5%|β | 229/5000 [1:53:20<38:32:47, 29.09s/it]
5%|β | 230/5000 [1:53:52<39:21:26, 29.70s/it]
5%|β | 231/5000 [1:54:21<39:22:20, 29.72s/it]
5%|β | 232/5000 [1:54:48<38:19:35, 28.94s/it]
5%|β | 233/5000 [1:55:21<39:38:40, 29.94s/it]
5%|β | 234/5000 [1:55:48<38:32:29, 29.11s/it]
5%|β | 235/5000 [1:56:17<38:35:33, 29.16s/it]
5%|β | 236/5000 [1:56:47<38:48:54, 29.33s/it]
5%|β | 237/5000 [1:57:16<38:51:52, 29.37s/it]
5%|β | 238/5000 [1:57:47<39:14:27, 29.67s/it]
5%|β | 239/5000 [1:58:09<36:24:34, 27.53s/it]
5%|β | 240/5000 [1:58:20<29:42:14, 22.47s/it]
5%|β | 241/5000 [1:58:30<24:56:47, 18.87s/it]
5%|β | 242/5000 [1:58:41<21:36:39, 16.35s/it]{'loss': 1.0543, 'learning_rate': 4.800000000000001e-07, 'epoch': 0.01} |
|
{'loss': 0.8847, 'learning_rate': 9.800000000000001e-07, 'epoch': 0.01} |
|
{'loss': 0.54, 'learning_rate': 1.48e-06, 'epoch': 0.01} |
|
{'loss': 0.304, 'learning_rate': 1.98e-06, 'epoch': 0.02} |
|
{'loss': 0.2861, 'learning_rate': 2.4800000000000004e-06, 'epoch': 0.03} |
|
{'loss': 0.2395, 'learning_rate': 2.9800000000000003e-06, 'epoch': 0.03} |
|
{'loss': 0.2282, 'learning_rate': 3.48e-06, 'epoch': 0.04} |
|
{'loss': 0.2209, 'learning_rate': 3.980000000000001e-06, 'epoch': 0.04} |
|
{'loss': 0.2299, 'learning_rate': 4.48e-06, 'epoch': 0.04} |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:00, 1.28it/s][A |
|
Reading metadata...: 14726it [00:00, 22755.29it/s][A |
|
Reading metadata...: 23388it [00:03, 6340.58it/s] [A |
|
Reading metadata...: 37933it [00:03, 12680.17it/s][A |
|
Reading metadata...: 46618it [00:06, 6782.81it/s] [A |
|
Reading metadata...: 59039it [00:06, 9618.95it/s][A |
|
Reading metadata...: 72546it [00:06, 14763.51it/s][A |
|
Reading metadata...: 80136it [00:06, 16119.07it/s][A |
|
Reading metadata...: 93827it [00:07, 24044.83it/s][A |
|
Reading metadata...: 102171it [00:15, 3763.63it/s][A |
|
Reading metadata...: 114047it [00:15, 5268.91it/s][A |
|
Reading metadata...: 127993it [00:15, 8041.86it/s][A |
|
Reading metadata...: 135601it [00:15, 9277.73it/s][A |
|
Reading metadata...: 149664it [00:23, 3971.36it/s][A |
|
Reading metadata...: 164498it [00:23, 6070.15it/s][A |
|
Reading metadata...: 171902it [00:23, 6887.15it/s][A |
|
Reading metadata...: 184380it [00:24, 9253.56it/s][A |
|
Reading metadata...: 199395it [00:24, 13975.77it/s][A |
|
Reading metadata...: 207317it [00:24, 15201.63it/s][A |
|
Reading metadata...: 221296it [00:24, 22089.09it/s][A |
|
Reading metadata...: 229844it [00:24, 23238.20it/s][A
Reading metadata...: 230467it [00:24, 9256.50it/s] |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:00, 3.85it/s][A |
|
Reading metadata...: 15233it [00:00, 54060.21it/s][A
Reading metadata...: 15520it [00:00, 42867.71it/s] |
|
5%|β | 243/5000 [2:00:45<64:12:40, 48.59s/it]
5%|β | 244/5000 [2:01:12<55:50:19, 42.27s/it]
5%|β | 245/5000 [2:01:42<50:47:40, 38.46s/it]
5%|β | 246/5000 [2:02:09<46:29:44, 35.21s/it]
5%|β | 247/5000 [2:02:40<44:32:06, 33.73s/it]
5%|β | 248/5000 [2:03:07<41:58:11, 31.80s/it]
5%|β | 249/5000 [2:03:35<40:37:24, 30.78s/it]
5%|β | 250/5000 [2:04:05<40:04:27, 30.37s/it]
5%|β | 250/5000 [2:04:05<40:04:27, 30.37s/it]
5%|β | 251/5000 [2:04:33<39:02:30, 29.60s/it]
5%|β | 252/5000 [2:05:03<39:12:04, 29.72s/it]
5%|β | 253/5000 [2:05:30<38:16:42, 29.03s/it]
5%|β | 254/5000 [2:06:00<38:39:51, 29.33s/it]
5%|β | 255/5000 [2:06:28<37:54:41, 28.76s/it]
5%|β | 256/5000 [2:06:58<38:39:26, 29.34s/it]
5%|β | 257/5000 [2:07:25<37:47:14, 28.68s/it]
5%|β | 258/5000 [2:07:55<38:13:11, 29.02s/it]
5%|β | 259/5000 [2:08:23<37:37:37, 28.57s/it]
5%|β | 260/5000 [2:08:52<38:01:05, 28.87s/it]
5%|β | 261/5000 [2:09:20<37:29:01, 28.47s/it]
5%|β | 262/5000 [2:09:50<38:05:16, 28.94s/it]
5%|β | 263/5000 [2:10:18<37:47:33, 28.72s/it]
5%|β | 264/5000 [2:10:48<38:25:50, 29.21s/it]
5%|β | 265/5000 [2:11:15<37:35:39, 28.58s/it]
5%|β | 266/5000 [2:11:46<38:19:21, 29.14s/it]
5%|β | 267/5000 [2:12:14<37:51:22, 28.79s/it]
5%|β | 268/5000 [2:12:44<38:11:47, 29.06s/it]
5%|β | 269/5000 [2:13:11<37:28:11, 28.51s/it]
5%|β | 270/5000 [2:13:44<39:22:49, 29.97s/it]
5%|β | 271/5000 [2:14:12<38:21:12, 29.20s/it]
5%|β | 272/5000 [2:14:42<38:39:22, 29.43s/it]
5%|β | 273/5000 [2:15:08<37:26:08, 28.51s/it]
5%|β | 274/5000 [2:15:38<38:11:11, 29.09s/it]
6%|β | 275/5000 [2:16:06<37:28:33, 28.55s/it]
6%|β | 275/5000 [2:16:06<37:28:33, 28.55s/it]
6%|β | 276/5000 [2:16:37<38:33:53, 29.39s/it]
6%|β | 277/5000 [2:17:05<37:52:30, 28.87s/it]
6%|β | 278/5000 [2:17:32<37:09:55, 28.33s/it]
6%|β | 279/5000 [2:18:01<37:34:06, 28.65s/it]
6%|β | 280/5000 [2:18:28<36:57:51, 28.19s/it]
6%|β | 281/5000 [2:18:59<37:50:16, 28.87s/it]
6%|β | 282/5000 [2:19:27<37:36:41, 28.70s/it]
6%|β | 283/5000 [2:19:56<37:47:32, 28.84s/it]
6%|β | 284/5000 [2:20:25<37:38:39, 28.74s/it]
6%|β | 285/5000 [2:20:54<37:55:58, 28.96s/it]
6%|β | 286/5000 [2:21:22<37:26:45, 28.60s/it]
6%|β | 287/5000 [2:21:52<38:01:05, 29.04s/it]
6%|β | 288/5000 [2:22:19<37:20:12, 28.53s/it]
6%|β | 289/5000 [2:22:52<39:08:31, 29.91s/it]
6%|β | 290/5000 [2:23:20<38:05:33, 29.12s/it]
6%|β | 291/5000 [2:23:50<38:28:38, 29.42s/it]
6%|β | 292/5000 [2:24:19<38:16:47, 29.27s/it]
6%|β | 293/5000 [2:24:49<38:48:34, 29.68s/it]
6%|β | 294/5000 [2:25:17<38:06:49, 29.16s/it]
6%|β | 295/5000 [2:25:53<40:29:24, 30.98s/it]
6%|β | 296/5000 [2:26:20<38:56:35, 29.80s/it]
6%|β | 297/5000 [2:26:49<38:50:01, 29.73s/it]
6%|β | 298/5000 [2:27:16<37:51:24, 28.98s/it]
6%|β | 299/5000 [2:27:44<37:23:59, 28.64s/it]
6%|β | 300/5000 [2:28:14<37:50:23, 28.98s/it]
6%|β | 300/5000 [2:28:14<37:50:23, 28.98s/it]
6%|β | 301/5000 [2:28:41<37:04:25, 28.40s/it]
6%|β | 302/5000 [2:29:12<38:12:03, 29.27s/it]
6%|β | 303/5000 [2:29:40<37:22:03, 28.64s/it]
6%|β | 304/5000 [2:30:09<37:48:42, 28.99s/it]
6%|β | 305/5000 [2:30:38<37:32:34, 28.79s/it]
6%|β | 306/5000 [2:31:07<37:52:40, 29.05s/it]
6%|β | 307/5000 [2:31:36<37:50:21, 29.03s/it]
6%|β | 308/5000 [2:32:08<38:58:46, 29.91s/it]
6%|β | 309/5000 [2:32:36<38:03:51, 29.21s/it]
6%|β | 310/5000 [2:33:05<38:12:05, 29.32s/it]
6%|β | 311/5000 [2:33:33<37:29:23, 28.78s/it]
6%|β | 312/5000 [2:34:00<36:52:44, 28.32s/it]
6%|β | 313/5000 [2:34:30<37:22:31, 28.71s/it]
6%|β | 314/5000 [2:34:58<37:04:16, 28.48s/it]
6%|β | 315/5000 [2:35:27<37:25:14, 28.75s/it]
6%|β | 316/5000 [2:35:55<37:11:34, 28.59s/it]
6%|β | 317/5000 [2:36:25<37:29:38, 28.82s/it]
6%|β | 318/5000 [2:36:53<37:05:03, 28.51s/it]
6%|β | 319/5000 [2:37:23<37:53:43, 29.14s/it]
6%|β | 320/5000 [2:37:50<37:05:23, 28.53s/it]
6%|β | 321/5000 [2:38:21<37:48:04, 29.08s/it]
6%|β | 322/5000 [2:38:48<37:15:36, 28.67s/it]
6%|β | 323/5000 [2:39:18<37:32:16, 28.89s/it]
6%|β | 324/5000 [2:39:46<37:15:34, 28.69s/it]
6%|β | 325/5000 [2:40:15<37:32:05, 28.90s/it]
6%|β | 325/5000 [2:40:15<37:32:05, 28.90s/it]
7%|β | 326/5000 [2:40:43<37:04:04, 28.55s/it]
7%|β | 327/5000 [2:41:13<37:40:03, 29.02s/it]
7%|β | 328/5000 [2:41:41<37:01:40, 28.53s/it]
7%|β | 329/5000 [2:42:11<37:44:09, 29.08s/it]
7%|β | 330/5000 [2:42:41<37:56:50, 29.25s/it]
7%|β | 331/5000 [2:43:08<37:19:50, 28.78s/it]
7%|β | 332/5000 [2:43:38<37:49:43, 29.17s/it]
7%|β | 333/5000 [2:44:06<37:09:29, 28.66s/it]
7%|β | 334/5000 [2:44:36<37:44:37, 29.12s/it]
7%|β | 335/5000 [2:45:03<36:59:15, 28.54s/it]
7%|β | 336/5000 [2:45:33<37:36:12, 29.02s/it]
7%|β | 337/5000 [2:46:01<37:06:28, 28.65s/it]
7%|β | 338/5000 [2:46:30<37:16:44, 28.79s/it]
7%|β | 339/5000 [2:46:59<37:03:08, 28.62s/it]
7%|β | 340/5000 [2:47:28<37:20:06, 28.84s/it]
7%|β | 341/5000 [2:47:55<36:50:57, 28.47s/it]
7%|β | 342/5000 [2:48:25<37:23:17, 28.90s/it]
7%|β | 343/5000 [2:48:53<36:44:07, 28.40s/it]
7%|β | 344/5000 [2:49:23<37:28:45, 28.98s/it]
7%|β | 345/5000 [2:49:50<36:52:30, 28.52s/it]
7%|β | 346/5000 [2:50:20<37:15:57, 28.83s/it]
7%|β | 347/5000 [2:50:48<37:01:08, 28.64s/it]
7%|β | 348/5000 [2:51:17<37:15:09, 28.83s/it]
7%|β | 349/5000 [2:51:45<36:43:30, 28.43s/it]
7%|β | 350/5000 [2:52:14<37:01:18, 28.66s/it]
7%|β | 350/5000 [2:52:14<37:01:18, 28.66s/it]
7%|β | 351/5000 [2:52:41<36:30:28, 28.27s/it]
7%|β | 352/5000 [2:53:16<39:04:59, 30.27s/it]
7%|β | 353/5000 [2:53:44<37:52:09, 29.34s/it]
7%|β | 354/5000 [2:54:13<37:45:08, 29.25s/it]
7%|β | 355/5000 [2:54:40<37:00:02, 28.68s/it]
7%|β | 356/5000 [2:55:09<37:10:14, 28.81s/it]
7%|β | 357/5000 [2:55:37<36:55:50, 28.63s/it]
7%|β | 358/5000 [2:56:07<37:16:16, 28.90s/it]
7%|β | 359/5000 [2:56:34<36:45:21, 28.51s/it]
7%|β | 360/5000 [2:57:04<37:12:38, 28.87s/it]
7%|β | 361/5000 [2:57:32<36:56:33, 28.67s/it]
7%|β | 362/5000 [2:58:03<37:44:00, 29.29s/it]
7%|β | 363/5000 [2:58:30<36:58:10, 28.70s/it]
7%|β | 364/5000 [2:59:00<37:28:38, 29.10s/it]
7%|β | 365/5000 [2:59:27<36:37:42, 28.45s/it]
7%|β | 366/5000 [2:59:59<37:46:01, 29.34s/it]
7%|β | 367/5000 [3:00:26<37:02:07, 28.78s/it]
7%|β | 368/5000 [3:01:01<39:15:32, 30.51s/it]
7%|β | 369/5000 [3:01:29<38:22:12, 29.83s/it]
7%|β | 370/5000 [3:01:56<37:24:28, 29.09s/it]
7%|β | 371/5000 [3:02:27<37:59:24, 29.55s/it]
7%|β | 372/5000 [3:02:59<38:49:51, 30.21s/it]
7%|β | 373/5000 [3:03:28<38:25:14, 29.89s/it]
7%|β | 374/5000 [3:03:56<37:34:58, 29.25s/it]
8%|β | 375/5000 [3:04:26<37:59:47, 29.58s/it]
8%|β | 375/5000 [3:04:26<37:59:47, 29.58s/it]
8%|β | 376/5000 [3:04:53<37:06:54, 28.90s/it]
8%|β | 377/5000 [3:05:23<37:29:25, 29.19s/it]
8%|β | 378/5000 [3:05:50<36:43:38, 28.61s/it]
8%|β | 379/5000 [3:06:21<37:21:10, 29.10s/it]
8%|β | 380/5000 [3:06:49<36:54:46, 28.76s/it]
8%|β | 381/5000 [3:07:19<37:30:16, 29.23s/it]
8%|β | 382/5000 [3:07:47<37:11:13, 28.99s/it]
8%|β | 383/5000 [3:08:18<37:45:11, 29.44s/it]
8%|β | 384/5000 [3:08:45<36:55:47, 28.80s/it]
8%|β | 385/5000 [3:09:14<36:59:17, 28.85s/it]
8%|β | 386/5000 [3:09:42<36:42:26, 28.64s/it]
8%|β | 387/5000 [3:10:13<37:20:55, 29.15s/it]
8%|β | 388/5000 [3:10:40<36:34:43, 28.55s/it]
8%|β | 389/5000 [3:11:10<37:05:19, 28.96s/it]
8%|β | 390/5000 [3:11:38<36:40:19, 28.64s/it]
8%|β | 391/5000 [3:12:07<37:00:03, 28.90s/it]
8%|β | 392/5000 [3:12:35<36:25:33, 28.46s/it]
8%|β | 393/5000 [3:13:04<36:45:33, 28.72s/it]
8%|β | 394/5000 [3:13:31<36:16:04, 28.35s/it]
8%|β | 395/5000 [3:13:59<36:09:58, 28.27s/it]
8%|β | 396/5000 [3:14:29<36:31:57, 28.57s/it]
8%|β | 397/5000 [3:14:57<36:17:43, 28.39s/it]
8%|β | 398/5000 [3:15:26<36:43:22, 28.73s/it]
8%|β | 399/5000 [3:15:54<36:18:29, 28.41s/it]
8%|β | 400/5000 [3:16:23<36:38:33, 28.68s/it]
8%|β | 400/5000 [3:16:23<36:38:33, 28.68s/it]
8%|β | 401/5000 [3:16:50<36:02:40, 28.21s/it]
8%|β | 402/5000 [3:17:21<36:51:51, 28.86s/it]
8%|β | 403/5000 [3:17:48<36:10:02, 28.32s/it]
8%|β | 404/5000 [3:18:18<36:42:23, 28.75s/it]
8%|β | 405/5000 [3:18:45<36:23:31, 28.51s/it]
8%|β | 406/5000 [3:19:21<39:06:47, 30.65s/it]
8%|β | 407/5000 [3:19:50<38:20:36, 30.05s/it]
8%|β | 408/5000 [3:20:19<38:04:11, 29.85s/it]
8%|β | 409/5000 [3:20:47<37:11:10, 29.16s/it]
8%|β | 410/5000 [3:21:17<37:46:20, 29.63s/it]
8%|β | 411/5000 [3:21:44<36:44:43, 28.83s/it]
8%|β | 412/5000 [3:22:22<40:05:41, 31.46s/it]
8%|β | 413/5000 [3:22:49<38:31:11, 30.23s/it]
8%|β | 414/5000 [3:23:19<38:19:43, 30.09s/it]
8%|β | 415/5000 [3:23:48<37:46:29, 29.66s/it]
8%|β | 416/5000 [3:24:17<37:42:58, 29.62s/it]
8%|β | 417/5000 [3:24:47<37:39:37, 29.58s/it]
8%|β | 418/5000 [3:25:16<37:42:41, 29.63s/it]
8%|β | 419/5000 [3:25:44<36:55:37, 29.02s/it]
8%|β | 420/5000 [3:26:16<38:06:54, 29.96s/it]
8%|β | 421/5000 [3:26:43<37:03:38, 29.14s/it]
8%|β | 422/5000 [3:27:14<37:36:29, 29.57s/it]
8%|β | 423/5000 [3:27:41<36:43:25, 28.88s/it]
8%|β | 424/5000 [3:28:11<37:03:32, 29.15s/it]
8%|β | 425/5000 [3:28:40<37:07:44, 29.22s/it]
8%|β | 425/5000 [3:28:40<37:07:44, 29.22s/it]
9%|β | 426/5000 [3:29:10<37:22:15, 29.41s/it]
9%|β | 427/5000 [3:29:38<36:33:43, 28.78s/it]
9%|β | 428/5000 [3:30:08<37:12:25, 29.30s/it]
9%|β | 429/5000 [3:30:36<36:33:04, 28.79s/it]
9%|β | 430/5000 [3:31:06<37:07:37, 29.25s/it]
9%|β | 431/5000 [3:31:34<36:30:15, 28.76s/it]
9%|β | 432/5000 [3:32:03<36:46:37, 28.98s/it]
9%|β | 433/5000 [3:32:30<36:05:17, 28.45s/it]
9%|β | 434/5000 [3:33:01<36:54:51, 29.10s/it]
9%|β | 435/5000 [3:33:28<36:14:39, 28.58s/it]
9%|β | 436/5000 [3:33:59<36:56:36, 29.14s/it]
9%|β | 437/5000 [3:34:27<36:36:06, 28.88s/it]
9%|β | 438/5000 [3:34:58<37:28:08, 29.57s/it]
9%|β | 439/5000 [3:35:26<36:42:40, 28.98s/it]
9%|β | 440/5000 [3:35:55<36:51:24, 29.10s/it]
9%|β | 441/5000 [3:36:23<36:17:12, 28.65s/it]
9%|β | 442/5000 [3:36:53<36:43:27, 29.01s/it]
9%|β | 443/5000 [3:37:20<36:12:54, 28.61s/it]
9%|β | 444/5000 [3:37:51<37:01:40, 29.26s/it]
9%|β | 445/5000 [3:38:18<36:15:14, 28.65s/it]
9%|β | 446/5000 [3:38:49<37:00:36, 29.26s/it]
9%|β | 447/5000 [3:39:16<36:11:55, 28.62s/it]
9%|β | 448/5000 [3:39:44<35:46:20, 28.29s/it]
9%|β | 449/5000 [3:40:14<36:28:02, 28.85s/it]
9%|β | 450/5000 [3:40:41<35:47:50, 28.32s/it]
9%|β | 450/5000 [3:40:41<35:47:50, 28.32s/it]
9%|β | 451/5000 [3:41:11<36:31:53, 28.91s/it]
9%|β | 452/5000 [3:41:38<35:53:15, 28.41s/it]
9%|β | 453/5000 [3:42:09<36:38:58, 29.02s/it]
9%|β | 454/5000 [3:42:36<35:55:40, 28.45s/it]
9%|β | 455/5000 [3:43:06<36:24:08, 28.83s/it]
9%|β | 456/5000 [3:43:33<35:44:58, 28.32s/it]
9%|β | 457/5000 [3:44:03<36:26:45, 28.88s/it]
9%|β | 458/5000 [3:44:31<36:08:04, 28.64s/it]
9%|β | 459/5000 [3:45:00<36:16:37, 28.76s/it]
9%|β | 460/5000 [3:45:27<35:41:22, 28.30s/it]
9%|β | 461/5000 [3:45:56<35:50:58, 28.43s/it]
9%|β | 462/5000 [3:46:23<35:23:31, 28.08s/it]
9%|β | 463/5000 [3:46:53<35:59:55, 28.56s/it]
9%|β | 464/5000 [3:47:21<35:38:22, 28.29s/it]
9%|β | 465/5000 [3:47:52<36:41:53, 29.13s/it]
9%|β | 466/5000 [3:48:19<36:00:47, 28.59s/it]
9%|β | 467/5000 [3:48:47<35:46:49, 28.42s/it]
9%|β | 468/5000 [3:49:17<36:10:17, 28.73s/it]
9%|β | 469/5000 [3:49:45<35:56:21, 28.55s/it]
9%|β | 470/5000 [3:50:15<36:22:11, 28.90s/it]
9%|β | 471/5000 [3:50:42<35:39:15, 28.34s/it]
9%|β | 472/5000 [3:51:11<36:12:59, 28.79s/it]
9%|β | 473/5000 [3:51:39<35:38:23, 28.34s/it]
9%|β | 474/5000 [3:52:09<36:12:59, 28.81s/it]
10%|β | 475/5000 [3:52:36<35:37:33, 28.34s/it]
10%|β | 475/5000 [3:52:36<35:37:33, 28.34s/it]
10%|β | 476/5000 [3:53:06<36:10:04, 28.78s/it]
10%|β | 477/5000 [3:53:33<35:46:08, 28.47s/it]
10%|β | 478/5000 [3:54:03<36:12:51, 28.83s/it]
10%|β | 479/5000 [3:54:31<35:59:16, 28.66s/it]
10%|β | 480/5000 [3:55:01<36:30:42, 29.08s/it]
10%|β | 481/5000 [3:55:29<35:57:52, 28.65s/it]
10%|β | 482/5000 [3:55:41<29:43:37, 23.69s/it]
10%|β | 483/5000 [3:55:52<24:44:48, 19.72s/it]
10%|β | 484/5000 [3:56:02<21:16:03, 16.95s/it]
10%|β | 485/5000 [3:56:13<18:50:40, 15.03s/it]{'loss': 0.2867, 'learning_rate': 4.980000000000001e-06, 'epoch': 1.0} |
|
{'loss': 0.3646, 'learning_rate': 5.480000000000001e-06, 'epoch': 1.01} |
|
{'loss': 0.3278, 'learning_rate': 5.98e-06, 'epoch': 1.01} |
|
{'loss': 0.3748, 'learning_rate': 6.480000000000001e-06, 'epoch': 1.02} |
|
{'loss': 0.2737, 'learning_rate': 6.98e-06, 'epoch': 1.02} |
|
{'loss': 0.2653, 'learning_rate': 7.48e-06, 'epoch': 1.03} |
|
{'loss': 0.2596, 'learning_rate': 7.980000000000002e-06, 'epoch': 1.03} |
|
{'loss': 0.2665, 'learning_rate': 8.48e-06, 'epoch': 1.04} |
|
{'loss': 0.2598, 'learning_rate': 8.98e-06, 'epoch': 1.04} |
|
{'loss': 0.242, 'learning_rate': 9.48e-06, 'epoch': 1.05} |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:01, 1.04s/it][A |
|
Reading metadata...: 15751it [00:01, 19095.71it/s][A |
|
Reading metadata...: 25016it [00:01, 16623.79it/s][A |
|
Reading metadata...: 40082it [00:01, 30667.68it/s][A |
|
Reading metadata...: 49475it [00:02, 29075.84it/s][A |
|
Reading metadata...: 59039it [00:02, 27352.30it/s][A |
|
Reading metadata...: 73815it [00:02, 41033.71it/s][A |
|
Reading metadata...: 82286it [00:05, 11264.53it/s][A |
|
Reading metadata...: 96058it [00:13, 3486.35it/s] [A |
|
Reading metadata...: 110587it [00:13, 5350.06it/s][A |
|
Reading metadata...: 117905it [00:15, 5480.20it/s][A |
|
Reading metadata...: 132000it [00:16, 6898.25it/s][A |
|
Reading metadata...: 146014it [00:16, 10168.62it/s][A |
|
Reading metadata...: 153083it [00:16, 11213.31it/s][A |
|
Reading metadata...: 167903it [00:16, 17038.42it/s][A |
|
Reading metadata...: 176226it [00:17, 13544.55it/s][A |
|
Reading metadata...: 184380it [00:18, 14560.00it/s][A |
|
Reading metadata...: 199849it [00:18, 22725.78it/s][A |
|
Reading metadata...: 207861it [00:18, 22429.49it/s][A |
|
Reading metadata...: 221914it [00:19, 21225.82it/s][A
Reading metadata...: 230467it [00:19, 11852.09it/s] |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:00, 3.44it/s][A
Reading metadata...: 15520it [00:00, 39827.17it/s] |
|
10%|β | 486/5000 [3:58:14<59:01:25, 47.07s/it]
10%|β | 487/5000 [3:58:43<51:55:07, 41.42s/it]
10%|β | 488/5000 [3:59:12<47:23:12, 37.81s/it]
10%|β | 489/5000 [3:59:39<43:19:15, 34.57s/it]
10%|β | 490/5000 [4:00:09<41:29:33, 33.12s/it]
10%|β | 491/5000 [4:00:36<39:07:48, 31.24s/it]
10%|β | 492/5000 [4:01:09<39:49:47, 31.81s/it]
10%|β | 493/5000 [4:01:37<38:31:14, 30.77s/it]
10%|β | 494/5000 [4:02:05<37:24:19, 29.88s/it]
10%|β | 495/5000 [4:02:34<36:59:42, 29.56s/it]
10%|β | 496/5000 [4:03:01<36:14:27, 28.97s/it]
10%|β | 497/5000 [4:03:30<36:09:49, 28.91s/it]
10%|β | 498/5000 [4:03:59<36:02:20, 28.82s/it]
10%|β | 499/5000 [4:04:26<35:26:22, 28.35s/it]
10%|β | 500/5000 [4:04:55<35:30:12, 28.40s/it]
10%|β | 500/5000 [4:04:55<35:30:12, 28.40s/it]
10%|β | 501/5000 [4:05:22<34:59:50, 28.00s/it]
10%|β | 502/5000 [4:05:51<35:27:18, 28.38s/it]
10%|β | 503/5000 [4:06:20<35:32:48, 28.46s/it]
10%|β | 504/5000 [4:06:48<35:32:06, 28.45s/it]
10%|β | 505/5000 [4:07:16<35:17:02, 28.26s/it]
10%|β | 506/5000 [4:07:43<34:49:41, 27.90s/it]
10%|β | 507/5000 [4:08:11<35:05:38, 28.12s/it]
10%|β | 508/5000 [4:08:40<35:21:25, 28.34s/it]
10%|β | 509/5000 [4:09:08<34:59:35, 28.05s/it]
10%|β | 510/5000 [4:09:36<35:15:04, 28.26s/it]
10%|β | 511/5000 [4:10:05<35:10:17, 28.21s/it]
10%|β | 512/5000 [4:10:33<35:09:53, 28.21s/it]
10%|β | 513/5000 [4:11:03<35:46:05, 28.70s/it]
10%|β | 514/5000 [4:11:30<35:12:59, 28.26s/it]
10%|β | 515/5000 [4:11:59<35:32:52, 28.53s/it]
10%|β | 516/5000 [4:12:26<35:04:10, 28.16s/it]
10%|β | 517/5000 [4:12:55<35:15:20, 28.31s/it]
10%|β | 518/5000 [4:13:22<34:51:43, 28.00s/it]
10%|β | 519/5000 [4:13:50<34:38:57, 27.84s/it]
10%|β | 520/5000 [4:14:18<34:53:07, 28.03s/it]
10%|β | 521/5000 [4:14:46<34:42:13, 27.89s/it]
10%|β | 522/5000 [4:15:14<35:01:13, 28.15s/it]
10%|β | 523/5000 [4:15:44<35:32:24, 28.58s/it]
10%|β | 524/5000 [4:16:11<35:02:06, 28.18s/it]
10%|β | 525/5000 [4:16:40<35:03:17, 28.20s/it]
10%|β | 525/5000 [4:16:40<35:03:17, 28.20s/it]
11%|β | 526/5000 [4:17:09<35:20:03, 28.43s/it]
11%|β | 527/5000 [4:17:36<35:00:31, 28.18s/it]
11%|β | 528/5000 [4:18:04<34:43:33, 27.95s/it]
11%|β | 529/5000 [4:18:31<34:32:01, 27.81s/it]
11%|β | 530/5000 [4:18:59<34:43:25, 27.97s/it]
11%|β | 531/5000 [4:19:28<34:54:15, 28.12s/it]
11%|β | 532/5000 [4:19:55<34:32:45, 27.83s/it]
11%|β | 533/5000 [4:20:24<34:48:42, 28.06s/it]
11%|β | 534/5000 [4:20:51<34:35:04, 27.88s/it]
11%|β | 535/5000 [4:21:19<34:26:54, 27.77s/it]
11%|β | 536/5000 [4:21:49<35:21:15, 28.51s/it]
11%|β | 537/5000 [4:22:16<34:56:58, 28.19s/it]
11%|β | 538/5000 [4:22:45<35:06:09, 28.32s/it]
11%|β | 539/5000 [4:23:12<34:39:28, 27.97s/it]
11%|β | 540/5000 [4:23:39<34:28:06, 27.82s/it]
11%|β | 541/5000 [4:24:07<34:14:53, 27.65s/it]
11%|β | 542/5000 [4:24:39<36:01:05, 29.09s/it]
11%|β | 543/5000 [4:25:08<35:57:27, 29.04s/it]
11%|β | 544/5000 [4:25:36<35:31:38, 28.70s/it]
11%|β | 545/5000 [4:26:05<35:27:12, 28.65s/it]
11%|β | 546/5000 [4:26:34<35:47:54, 28.93s/it]
11%|β | 547/5000 [4:27:02<35:14:58, 28.50s/it]
11%|β | 548/5000 [4:27:31<35:37:04, 28.80s/it]
11%|β | 549/5000 [4:27:59<35:17:14, 28.54s/it]
11%|β | 550/5000 [4:28:28<35:16:21, 28.54s/it]
11%|β | 550/5000 [4:28:28<35:16:21, 28.54s/it]
11%|β | 551/5000 [4:28:57<35:39:03, 28.85s/it]
11%|β | 552/5000 [4:29:24<35:00:21, 28.33s/it]
11%|β | 553/5000 [4:29:54<35:34:21, 28.80s/it]
11%|β | 554/5000 [4:30:21<34:58:10, 28.32s/it]
11%|β | 555/5000 [4:30:52<35:44:58, 28.95s/it]
11%|β | 556/5000 [4:31:22<36:19:34, 29.43s/it]
11%|β | 557/5000 [4:31:52<36:32:51, 29.61s/it]
11%|β | 558/5000 [4:32:29<39:17:44, 31.85s/it]
11%|β | 559/5000 [4:32:57<37:33:30, 30.45s/it]
11%|β | 560/5000 [4:33:26<37:02:03, 30.03s/it]
11%|β | 561/5000 [4:33:54<36:30:08, 29.60s/it]
11%|β | 562/5000 [4:34:22<35:54:09, 29.12s/it]
11%|ββ | 563/5000 [4:34:58<38:23:23, 31.15s/it]
11%|ββ | 564/5000 [4:35:27<37:30:14, 30.44s/it]
11%|ββ | 565/5000 [4:35:57<37:16:34, 30.26s/it]
11%|ββ | 566/5000 [4:36:23<35:47:56, 29.07s/it]
11%|ββ | 567/5000 [4:36:53<36:01:02, 29.25s/it]
11%|ββ | 568/5000 [4:37:22<36:05:44, 29.32s/it]
11%|ββ | 569/5000 [4:37:51<35:45:23, 29.05s/it]
11%|ββ | 570/5000 [4:38:19<35:27:56, 28.82s/it]
11%|ββ | 571/5000 [4:38:47<35:07:11, 28.55s/it]
11%|ββ | 572/5000 [4:39:16<35:11:44, 28.61s/it]{'loss': 0.245, 'learning_rate': 9.980000000000001e-06, 'epoch': 2.0} |
|
{'loss': 0.2256, 'learning_rate': 9.946666666666667e-06, 'epoch': 2.01} |
|
{'loss': 0.209, 'learning_rate': 9.891111111111113e-06, 'epoch': 2.01} |
|
05/10/2023 14:21:14 - WARNING - datasets.download.streaming_download_manager - Got disconnected from remote data host. Retrying in 5sec [1/20] |
|
11%|ββ | 573/5000 [4:39:59<40:47:46, 33.18s/it]
11%|ββ | 574/5000 [4:40:27<38:39:16, 31.44s/it]
12%|ββ | 575/5000 [4:40:57<38:15:47, 31.13s/it]
12%|ββ | 575/5000 [4:40:57<38:15:47, 31.13s/it]
12%|ββ | 576/5000 [4:41:24<36:46:29, 29.93s/it]
12%|ββ | 577/5000 [4:41:54<36:32:21, 29.74s/it]
12%|ββ | 578/5000 [4:42:22<36:04:32, 29.37s/it]
12%|ββ | 579/5000 [4:42:51<35:51:34, 29.20s/it]
12%|ββ | 580/5000 [4:43:20<35:40:57, 29.06s/it]
12%|ββ | 581/5000 [4:43:48<35:22:58, 28.83s/it]
12%|ββ | 582/5000 [4:44:16<35:12:04, 28.68s/it]
12%|ββ | 583/5000 [4:44:45<35:17:58, 28.77s/it]
12%|ββ | 584/5000 [4:45:13<34:43:35, 28.31s/it]
12%|ββ | 585/5000 [4:45:43<35:21:39, 28.83s/it]
12%|ββ | 586/5000 [4:46:10<34:48:26, 28.39s/it]
12%|ββ | 587/5000 [4:46:38<34:45:39, 28.36s/it]
12%|ββ | 588/5000 [4:47:07<35:01:55, 28.58s/it]
12%|ββ | 589/5000 [4:47:35<34:49:22, 28.42s/it]
12%|ββ | 590/5000 [4:48:03<34:40:11, 28.30s/it]
12%|ββ | 591/5000 [4:48:33<35:00:21, 28.58s/it]
12%|ββ | 592/5000 [4:49:00<34:33:03, 28.22s/it]
12%|ββ | 593/5000 [4:49:28<34:37:13, 28.28s/it]
12%|ββ | 594/5000 [4:49:56<34:11:05, 27.93s/it]
12%|ββ | 595/5000 [4:50:26<34:56:50, 28.56s/it]
12%|ββ | 596/5000 [4:50:53<34:25:41, 28.14s/it]
12%|ββ | 597/5000 [4:51:22<34:51:51, 28.51s/it]
12%|ββ | 598/5000 [4:51:52<35:20:34, 28.90s/it]
12%|ββ | 599/5000 [4:52:20<35:01:04, 28.64s/it]
12%|ββ | 600/5000 [4:52:54<36:49:52, 30.13s/it]
12%|ββ | 600/5000 [4:52:54<36:49:52, 30.13s/it]
12%|ββ | 601/5000 [4:53:23<36:36:18, 29.96s/it]
12%|ββ | 602/5000 [4:53:50<35:34:04, 29.11s/it]
12%|ββ | 603/5000 [4:54:20<35:43:25, 29.25s/it]
12%|ββ | 604/5000 [4:54:47<34:54:12, 28.58s/it]
12%|ββ | 605/5000 [4:55:17<35:19:47, 28.94s/it]
12%|ββ | 606/5000 [4:55:44<34:38:36, 28.38s/it]
12%|ββ | 607/5000 [4:56:15<35:31:39, 29.11s/it]
12%|ββ | 608/5000 [4:56:45<36:09:47, 29.64s/it]
12%|ββ | 609/5000 [4:57:14<35:43:59, 29.30s/it]
12%|ββ | 610/5000 [4:57:43<35:28:15, 29.09s/it]
12%|ββ | 611/5000 [4:58:12<35:30:54, 29.13s/it]
12%|ββ | 612/5000 [4:58:39<34:48:34, 28.56s/it]
12%|ββ | 613/5000 [4:59:08<34:52:40, 28.62s/it]
12%|ββ | 614/5000 [4:59:35<34:16:16, 28.13s/it]
12%|ββ | 615/5000 [5:00:04<34:31:51, 28.35s/it]
12%|ββ | 616/5000 [5:00:32<34:38:16, 28.44s/it]
12%|ββ | 617/5000 [5:01:01<34:50:59, 28.62s/it]
12%|ββ | 618/5000 [5:01:34<36:29:50, 29.98s/it]
12%|ββ | 619/5000 [5:02:02<35:38:08, 29.28s/it]
12%|ββ | 620/5000 [5:02:30<35:18:17, 29.02s/it]
12%|ββ | 621/5000 [5:02:59<35:00:43, 28.78s/it]
12%|ββ | 622/5000 [5:03:26<34:27:13, 28.33s/it]
12%|ββ | 623/5000 [5:03:54<34:18:05, 28.21s/it]
12%|ββ | 624/5000 [5:04:22<34:18:00, 28.22s/it]
12%|ββ | 625/5000 [5:04:50<34:18:11, 28.23s/it]
12%|ββ | 625/5000 [5:04:50<34:18:11, 28.23s/it]
13%|ββ | 626/5000 [5:05:20<34:42:05, 28.56s/it]
13%|ββ | 627/5000 [5:05:47<34:08:52, 28.11s/it]
13%|ββ | 628/5000 [5:06:16<34:22:13, 28.30s/it]
13%|ββ | 629/5000 [5:06:43<33:53:45, 27.92s/it]
13%|ββ | 630/5000 [5:07:14<35:17:30, 29.07s/it]
13%|ββ | 631/5000 [5:07:43<35:06:09, 28.92s/it]
13%|ββ | 632/5000 [5:08:11<34:39:29, 28.56s/it]
13%|ββ | 633/5000 [5:08:39<34:30:44, 28.45s/it]
13%|ββ | 634/5000 [5:09:07<34:24:13, 28.37s/it]
13%|ββ | 635/5000 [5:09:35<34:19:30, 28.31s/it]
13%|ββ | 636/5000 [5:10:04<34:27:17, 28.42s/it]
13%|ββ | 637/5000 [5:10:31<33:59:18, 28.04s/it]
13%|ββ | 638/5000 [5:10:59<34:03:54, 28.11s/it]
13%|ββ | 639/5000 [5:11:28<34:19:41, 28.34s/it]
13%|ββ | 640/5000 [5:11:56<34:05:23, 28.15s/it]
13%|ββ | 641/5000 [5:12:25<34:22:44, 28.39s/it]
13%|ββ | 642/5000 [5:12:52<34:01:02, 28.10s/it]
13%|ββ | 643/5000 [5:13:20<34:03:20, 28.14s/it]
13%|ββ | 644/5000 [5:13:50<34:27:01, 28.47s/it]
13%|ββ | 645/5000 [5:14:17<33:58:15, 28.08s/it]
13%|ββ | 646/5000 [5:14:46<34:17:46, 28.36s/it]
13%|ββ | 647/5000 [5:15:13<33:49:45, 27.98s/it]
13%|ββ | 648/5000 [5:15:42<34:21:44, 28.42s/it]
13%|ββ | 649/5000 [5:16:11<34:16:45, 28.36s/it]
13%|ββ | 650/5000 [5:16:38<33:43:03, 27.90s/it]
13%|ββ | 650/5000 [5:16:38<33:43:03, 27.90s/it]
13%|ββ | 651/5000 [5:17:07<34:22:48, 28.46s/it]
13%|ββ | 652/5000 [5:17:34<33:52:35, 28.05s/it]
13%|ββ | 653/5000 [5:18:03<34:07:10, 28.26s/it]
13%|ββ | 654/5000 [5:18:30<33:47:04, 27.99s/it]
13%|ββ | 655/5000 [5:18:59<34:00:10, 28.17s/it]
13%|ββ | 656/5000 [5:19:28<34:19:44, 28.45s/it]
13%|ββ | 657/5000 [5:19:55<33:48:20, 28.02s/it]
13%|ββ | 658/5000 [5:20:24<34:01:03, 28.20s/it]
13%|ββ | 659/5000 [5:20:52<33:59:33, 28.19s/it]
13%|ββ | 660/5000 [5:21:20<34:05:00, 28.27s/it]
13%|ββ | 661/5000 [5:21:49<34:20:18, 28.49s/it]
13%|ββ | 662/5000 [5:22:17<33:49:22, 28.07s/it]
13%|ββ | 663/5000 [5:22:47<34:38:45, 28.76s/it]
13%|ββ | 664/5000 [5:23:14<33:59:12, 28.22s/it]
13%|ββ | 665/5000 [5:23:43<34:21:41, 28.54s/it]
13%|ββ | 666/5000 [5:24:13<34:59:56, 29.07s/it]
13%|ββ | 667/5000 [5:24:41<34:26:02, 28.61s/it]
13%|ββ | 668/5000 [5:25:10<34:23:58, 28.59s/it]
13%|ββ | 669/5000 [5:25:37<34:09:20, 28.39s/it]
13%|ββ | 670/5000 [5:26:05<33:49:30, 28.12s/it]
13%|ββ | 671/5000 [5:26:34<34:06:17, 28.36s/it]
13%|ββ | 672/5000 [5:27:01<33:38:34, 27.98s/it]
13%|ββ | 673/5000 [5:27:29<33:48:14, 28.12s/it]
13%|ββ | 674/5000 [5:27:57<33:27:28, 27.84s/it]
14%|ββ | 675/5000 [5:28:26<33:50:01, 28.16s/it]
14%|ββ | 675/5000 [5:28:26<33:50:01, 28.16s/it]
14%|ββ | 676/5000 [5:28:53<33:29:13, 27.88s/it]
14%|ββ | 677/5000 [5:29:21<33:40:11, 28.04s/it]
14%|ββ | 678/5000 [5:29:49<33:39:39, 28.04s/it]
14%|ββ | 679/5000 [5:30:17<33:37:37, 28.02s/it]
14%|ββ | 680/5000 [5:30:50<35:25:47, 29.52s/it]
14%|ββ | 681/5000 [5:31:18<34:39:24, 28.89s/it]
14%|ββ | 682/5000 [5:31:46<34:23:33, 28.67s/it]
14%|ββ | 683/5000 [5:32:16<34:54:39, 29.11s/it]
14%|ββ | 684/5000 [5:32:43<34:12:00, 28.53s/it]
14%|ββ | 685/5000 [5:33:12<34:15:56, 28.59s/it]
14%|ββ | 686/5000 [5:33:39<33:46:32, 28.19s/it]
14%|ββ | 687/5000 [5:34:07<33:39:02, 28.09s/it]
14%|ββ | 688/5000 [5:34:34<33:16:41, 27.78s/it]
14%|ββ | 689/5000 [5:35:02<33:16:57, 27.79s/it]
14%|ββ | 690/5000 [5:35:30<33:18:56, 27.83s/it]
14%|ββ | 691/5000 [5:35:57<33:16:16, 27.80s/it]
14%|ββ | 692/5000 [5:36:25<33:17:30, 27.82s/it]
14%|ββ | 693/5000 [5:36:53<33:20:06, 27.86s/it]
14%|ββ | 694/5000 [5:37:21<33:12:18, 27.76s/it]
14%|ββ | 695/5000 [5:37:49<33:26:51, 27.97s/it]
14%|ββ | 696/5000 [5:38:16<33:09:09, 27.73s/it]
14%|ββ | 697/5000 [5:38:45<33:27:38, 27.99s/it]
14%|ββ | 698/5000 [5:39:12<33:15:51, 27.84s/it]
14%|ββ | 699/5000 [5:39:42<33:47:13, 28.28s/it]
14%|ββ | 700/5000 [5:40:09<33:25:24, 27.98s/it]
14%|ββ | 700/5000 [5:40:09<33:25:24, 27.98s/it]
14%|ββ | 701/5000 [5:40:38<33:49:12, 28.32s/it]
14%|ββ | 702/5000 [5:41:06<33:48:01, 28.31s/it]
14%|ββ | 703/5000 [5:41:33<33:18:52, 27.91s/it]
14%|ββ | 704/5000 [5:42:02<33:31:58, 28.10s/it]
14%|ββ | 705/5000 [5:42:29<33:12:10, 27.83s/it]
14%|ββ | 706/5000 [5:42:58<33:31:47, 28.11s/it]
14%|ββ | 707/5000 [5:43:25<33:10:31, 27.82s/it]
14%|ββ | 708/5000 [5:43:54<33:28:57, 28.08s/it]
14%|ββ | 709/5000 [5:44:22<33:26:10, 28.05s/it]
14%|ββ | 710/5000 [5:44:49<33:15:27, 27.91s/it]
14%|ββ | 711/5000 [5:45:18<33:30:15, 28.12s/it]
14%|ββ | 712/5000 [5:45:46<33:20:00, 27.99s/it]
14%|ββ | 713/5000 [5:46:14<33:28:33, 28.11s/it]
14%|ββ | 714/5000 [5:46:42<33:24:13, 28.06s/it]
14%|ββ | 715/5000 [5:47:10<33:25:10, 28.08s/it]
14%|ββ | 716/5000 [5:47:39<33:45:55, 28.37s/it]
14%|ββ | 717/5000 [5:48:06<33:08:19, 27.85s/it]
14%|ββ | 718/5000 [5:48:34<33:19:45, 28.02s/it]
14%|ββ | 719/5000 [5:49:01<32:57:32, 27.72s/it]
14%|ββ | 720/5000 [5:49:30<33:10:49, 27.91s/it]
14%|ββ | 721/5000 [5:49:58<33:11:27, 27.92s/it]
14%|ββ | 722/5000 [5:50:26<33:14:19, 27.97s/it]
14%|ββ | 723/5000 [5:50:55<33:50:46, 28.49s/it]
14%|ββ | 724/5000 [5:51:16<31:01:47, 26.12s/it]
14%|ββ | 725/5000 [5:51:27<25:30:12, 21.48s/it]
14%|ββ | 725/5000 [5:51:27<25:30:12, 21.48s/it]
15%|ββ | 726/5000 [5:51:37<21:34:55, 18.18s/it]
15%|ββ | 727/5000 [5:51:48<18:50:29, 15.87s/it]{'loss': 0.2339, 'learning_rate': 9.835555555555556e-06, 'epoch': 2.02} |
|
{'loss': 0.1714, 'learning_rate': 9.780000000000001e-06, 'epoch': 2.02} |
|
{'loss': 0.1657, 'learning_rate': 9.724444444444445e-06, 'epoch': 2.03} |
|
{'loss': 0.1688, 'learning_rate': 9.66888888888889e-06, 'epoch': 2.03} |
|
{'loss': 0.1616, 'learning_rate': 9.613333333333335e-06, 'epoch': 2.04} |
|
{'loss': 0.1541, 'learning_rate': 9.557777777777777e-06, 'epoch': 2.04} |
|
{'loss': 0.1565, 'learning_rate': 9.502222222222223e-06, 'epoch': 2.05} |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:00, 1.22it/s][A |
|
Reading metadata...: 15305it [00:00, 22720.16it/s][A |
|
Reading metadata...: 24307it [00:01, 16290.58it/s][A |
|
Reading metadata...: 38794it [00:01, 29865.72it/s][A |
|
Reading metadata...: 47811it [00:02, 24797.60it/s][A |
|
Reading metadata...: 59039it [00:03, 18295.04it/s][A |
|
Reading metadata...: 72636it [00:03, 27336.81it/s][A |
|
Reading metadata...: 80216it [00:04, 16069.94it/s][A |
|
Reading metadata...: 93450it [00:04, 23744.46it/s][A |
|
Reading metadata...: 101289it [00:04, 23211.42it/s][A |
|
Reading metadata...: 114047it [00:07, 10528.73it/s][A |
|
Reading metadata...: 128225it [00:07, 15810.54it/s][A |
|
Reading metadata...: 135590it [00:11, 5740.60it/s] [A |
|
Reading metadata...: 149664it [00:12, 8135.61it/s][A |
|
Reading metadata...: 164652it [00:12, 12291.18it/s][A |
|
Reading metadata...: 172319it [00:14, 7862.68it/s] [A |
|
Reading metadata...: 184380it [00:16, 6816.06it/s][A |
|
Reading metadata...: 199539it [00:16, 10435.79it/s][A |
|
Reading metadata...: 206876it [00:17, 11503.04it/s][A |
|
Reading metadata...: 220924it [00:17, 17051.73it/s][A |
|
Reading metadata...: 229142it [00:21, 5822.80it/s] [A
Reading metadata...: 230467it [00:21, 10480.94it/s] |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:00, 3.29it/s][A |
|
Reading metadata...: 14712it [00:00, 47049.80it/s][A
Reading metadata...: 15520it [00:00, 37871.82it/s] |
|
15%|ββ | 728/5000 [5:53:41<53:39:39, 45.22s/it]
15%|ββ | 729/5000 [5:54:11<48:04:05, 40.52s/it]
15%|ββ | 730/5000 [5:54:39<43:36:48, 36.77s/it]
15%|ββ | 731/5000 [5:55:08<40:59:17, 34.56s/it]
15%|ββ | 732/5000 [5:55:42<40:45:17, 34.38s/it]
15%|ββ | 733/5000 [5:56:09<38:11:56, 32.23s/it]
15%|ββ | 734/5000 [5:56:40<37:27:14, 31.61s/it]
15%|ββ | 735/5000 [5:57:07<36:04:03, 30.44s/it]
15%|ββ | 736/5000 [5:57:42<37:32:24, 31.69s/it]
15%|ββ | 737/5000 [5:58:10<36:21:08, 30.70s/it]
15%|ββ | 738/5000 [5:58:40<36:02:02, 30.44s/it]
15%|ββ | 739/5000 [5:59:10<35:56:49, 30.37s/it]
15%|ββ | 740/5000 [5:59:38<34:51:02, 29.45s/it]
15%|ββ | 741/5000 [6:00:07<34:45:35, 29.38s/it]
15%|ββ | 742/5000 [6:00:34<33:56:30, 28.70s/it]
15%|ββ | 743/5000 [6:01:05<34:37:05, 29.28s/it]
15%|ββ | 744/5000 [6:01:34<34:34:46, 29.25s/it]
15%|ββ | 745/5000 [6:02:06<35:41:29, 30.20s/it]
15%|ββ | 746/5000 [6:02:36<35:33:58, 30.10s/it]
15%|ββ | 747/5000 [6:03:03<34:34:25, 29.27s/it]
15%|ββ | 748/5000 [6:03:31<34:08:49, 28.91s/it]
15%|ββ | 749/5000 [6:04:03<34:54:34, 29.56s/it]
15%|ββ | 750/5000 [6:04:30<34:05:50, 28.88s/it]
15%|ββ | 750/5000 [6:04:30<34:05:50, 28.88s/it]
15%|ββ | 751/5000 [6:05:07<37:05:15, 31.42s/it]
15%|ββ | 752/5000 [6:05:37<36:39:13, 31.06s/it]
15%|ββ | 753/5000 [6:06:05<35:28:54, 30.08s/it]
15%|ββ | 754/5000 [6:06:39<36:51:03, 31.24s/it]
15%|ββ | 755/5000 [6:07:07<35:38:37, 30.23s/it]
15%|ββ | 756/5000 [6:07:37<35:26:10, 30.06s/it]
15%|ββ | 757/5000 [6:08:07<35:29:49, 30.12s/it]
15%|ββ | 758/5000 [6:08:34<34:15:32, 29.07s/it]
15%|ββ | 759/5000 [6:09:03<34:21:23, 29.16s/it]
15%|ββ | 760/5000 [6:09:30<33:45:17, 28.66s/it]
15%|ββ | 761/5000 [6:09:59<33:51:21, 28.75s/it]
15%|ββ | 762/5000 [6:10:27<33:25:13, 28.39s/it]
15%|ββ | 763/5000 [6:10:54<33:01:41, 28.06s/it]
15%|ββ | 764/5000 [6:11:31<36:15:42, 30.82s/it]
15%|ββ | 765/5000 [6:11:59<34:56:02, 29.70s/it]
15%|ββ | 766/5000 [6:12:28<35:00:23, 29.76s/it]
15%|ββ | 767/5000 [6:12:57<34:23:52, 29.25s/it]
15%|ββ | 768/5000 [6:13:24<33:54:39, 28.85s/it]
15%|ββ | 769/5000 [6:13:55<34:23:01, 29.26s/it]
15%|ββ | 770/5000 [6:14:23<34:04:35, 29.00s/it]
15%|ββ | 771/5000 [6:14:51<33:51:13, 28.82s/it]
15%|ββ | 772/5000 [6:15:26<35:51:57, 30.54s/it]
15%|ββ | 773/5000 [6:15:53<34:33:53, 29.44s/it]
15%|ββ | 774/5000 [6:16:23<34:46:09, 29.62s/it]
16%|ββ | 775/5000 [6:16:50<33:53:36, 28.88s/it]
16%|ββ | 775/5000 [6:16:50<33:53:36, 28.88s/it]
16%|ββ | 776/5000 [6:17:19<33:49:31, 28.83s/it]
16%|ββ | 777/5000 [6:17:47<33:27:29, 28.52s/it]
16%|ββ | 778/5000 [6:18:15<33:18:03, 28.40s/it]
16%|ββ | 779/5000 [6:18:44<33:35:37, 28.65s/it]
16%|ββ | 780/5000 [6:19:14<34:01:02, 29.02s/it]
16%|ββ | 781/5000 [6:19:41<33:22:30, 28.48s/it]
16%|ββ | 782/5000 [6:20:11<33:48:20, 28.85s/it]
16%|ββ | 783/5000 [6:20:38<33:12:53, 28.36s/it]
16%|ββ | 784/5000 [6:21:06<33:04:51, 28.25s/it]
16%|ββ | 785/5000 [6:21:36<33:49:27, 28.89s/it]
16%|ββ | 786/5000 [6:22:04<33:16:00, 28.42s/it]
16%|ββ | 787/5000 [6:22:37<35:04:51, 29.98s/it]
16%|ββ | 788/5000 [6:23:05<34:07:36, 29.17s/it]
16%|ββ | 789/5000 [6:23:35<34:37:41, 29.60s/it]
16%|ββ | 790/5000 [6:24:05<34:39:41, 29.64s/it]
16%|ββ | 791/5000 [6:24:32<33:54:30, 29.00s/it]
16%|ββ | 792/5000 [6:25:02<34:02:49, 29.13s/it]
16%|ββ | 793/5000 [6:25:30<33:32:34, 28.70s/it]
16%|ββ | 794/5000 [6:25:59<33:55:58, 29.04s/it]
16%|ββ | 795/5000 [6:26:27<33:22:46, 28.58s/it]
16%|ββ | 796/5000 [6:26:57<33:46:37, 28.92s/it]
16%|ββ | 797/5000 [6:27:26<34:00:05, 29.12s/it]
16%|ββ | 798/5000 [6:27:54<33:36:14, 28.79s/it]
16%|ββ | 799/5000 [6:28:22<33:09:42, 28.42s/it]
16%|ββ | 800/5000 [6:28:50<33:05:30, 28.36s/it]
16%|ββ | 800/5000 [6:28:50<33:05:30, 28.36s/it]
16%|ββ | 801/5000 [6:29:20<33:49:58, 29.01s/it]
16%|ββ | 802/5000 [6:29:48<33:18:34, 28.56s/it]
16%|ββ | 803/5000 [6:30:16<33:00:33, 28.31s/it]
16%|ββ | 804/5000 [6:30:45<33:21:32, 28.62s/it]
16%|ββ | 805/5000 [6:31:12<32:53:59, 28.23s/it]
16%|ββ | 806/5000 [6:31:43<33:43:33, 28.95s/it]
16%|ββ | 807/5000 [6:32:12<33:51:49, 29.07s/it]
16%|ββ | 808/5000 [6:32:40<33:21:02, 28.64s/it]
16%|ββ | 809/5000 [6:33:09<33:38:23, 28.90s/it]
16%|ββ | 810/5000 [6:33:38<33:21:03, 28.65s/it]
16%|ββ | 811/5000 [6:34:07<33:43:25, 28.98s/it]
16%|ββ | 812/5000 [6:34:38<34:18:16, 29.49s/it]
16%|ββ | 813/5000 [6:35:05<33:21:11, 28.68s/it]
16%|ββ | 814/5000 [6:35:40<35:44:30, 30.74s/it]
16%|ββ | 815/5000 [6:36:08<34:48:28, 29.94s/it]
16%|ββ | 816/5000 [6:36:36<33:51:27, 29.13s/it]
16%|ββ | 817/5000 [6:37:05<34:01:11, 29.28s/it]
16%|ββ | 818/5000 [6:37:32<33:15:36, 28.63s/it]
16%|ββ | 819/5000 [6:38:03<33:49:33, 29.13s/it]
16%|ββ | 820/5000 [6:38:30<33:13:06, 28.61s/it]
16%|ββ | 821/5000 [6:39:00<33:48:53, 29.13s/it]
16%|ββ | 822/5000 [6:39:30<33:48:45, 29.13s/it]
16%|ββ | 823/5000 [6:39:58<33:22:56, 28.77s/it]
16%|ββ | 824/5000 [6:40:27<33:47:23, 29.13s/it]
16%|ββ | 825/5000 [6:40:55<33:15:27, 28.68s/it]
16%|ββ | 825/5000 [6:40:55<33:15:27, 28.68s/it]
17%|ββ | 826/5000 [6:41:24<33:29:37, 28.89s/it]
17%|ββ | 827/5000 [6:41:54<33:46:18, 29.13s/it]
17%|ββ | 828/5000 [6:42:21<33:04:41, 28.54s/it]
17%|ββ | 829/5000 [6:42:50<33:13:19, 28.67s/it]
17%|ββ | 830/5000 [6:43:18<32:42:05, 28.23s/it]
17%|ββ | 831/5000 [6:43:49<33:39:21, 29.06s/it]
17%|ββ | 832/5000 [6:44:19<33:58:04, 29.34s/it]
17%|ββ | 833/5000 [6:44:47<33:44:01, 29.14s/it]
17%|ββ | 834/5000 [6:45:17<33:50:18, 29.24s/it]
17%|ββ | 835/5000 [6:45:44<33:19:22, 28.80s/it]
17%|ββ | 836/5000 [6:46:14<33:30:41, 28.97s/it]
17%|ββ | 837/5000 [6:46:45<34:14:45, 29.61s/it]
17%|ββ | 838/5000 [6:47:12<33:30:04, 28.98s/it]
17%|ββ | 839/5000 [6:47:42<33:47:51, 29.24s/it]
17%|ββ | 840/5000 [6:48:09<33:02:44, 28.60s/it]
17%|ββ | 841/5000 [6:48:39<33:31:31, 29.02s/it]
17%|ββ | 842/5000 [6:49:09<33:35:38, 29.09s/it]
17%|ββ | 843/5000 [6:49:37<33:12:01, 28.75s/it]
17%|ββ | 844/5000 [6:50:06<33:23:16, 28.92s/it]
17%|ββ | 845/5000 [6:50:34<33:01:02, 28.61s/it]
17%|ββ | 846/5000 [6:51:03<33:12:25, 28.78s/it]
17%|ββ | 847/5000 [6:51:33<33:48:14, 29.30s/it]
17%|ββ | 848/5000 [6:52:01<33:01:41, 28.64s/it]
17%|ββ | 849/5000 [6:52:37<35:46:30, 31.03s/it]
17%|ββ | 850/5000 [6:53:05<34:29:28, 29.92s/it]
17%|ββ | 850/5000 [6:53:05<34:29:28, 29.92s/it]
17%|ββ | 851/5000 [6:53:35<34:48:42, 30.21s/it]
17%|ββ | 852/5000 [6:54:05<34:42:58, 30.13s/it]
17%|ββ | 853/5000 [6:54:33<33:51:44, 29.40s/it]
17%|ββ | 854/5000 [6:55:03<34:05:02, 29.60s/it]
17%|ββ | 855/5000 [6:55:30<33:11:42, 28.83s/it]
17%|ββ | 856/5000 [6:55:58<32:59:04, 28.65s/it]
17%|ββ | 857/5000 [6:56:27<33:07:10, 28.78s/it]
17%|ββ | 858/5000 [6:56:55<32:40:02, 28.39s/it]
17%|ββ | 859/5000 [6:57:23<32:24:55, 28.18s/it]
17%|ββ | 860/5000 [6:57:58<34:50:59, 30.30s/it]
17%|ββ | 861/5000 [6:58:25<33:51:25, 29.45s/it]
17%|ββ | 862/5000 [6:58:58<34:55:20, 30.38s/it]
17%|ββ | 863/5000 [6:59:25<33:42:58, 29.34s/it]
17%|ββ | 864/5000 [6:59:54<33:43:34, 29.36s/it]
17%|ββ | 865/5000 [7:00:22<33:14:45, 28.94s/it]
17%|ββ | 866/5000 [7:00:50<32:47:23, 28.55s/it]
17%|ββ | 867/5000 [7:01:20<33:13:25, 28.94s/it]
17%|ββ | 868/5000 [7:01:48<32:52:28, 28.64s/it]
17%|ββ | 869/5000 [7:02:17<33:06:23, 28.85s/it]
17%|ββ | 870/5000 [7:02:45<32:51:15, 28.64s/it]
17%|ββ | 871/5000 [7:03:20<35:01:17, 30.53s/it]
17%|ββ | 872/5000 [7:03:51<35:08:10, 30.64s/it]
17%|ββ | 873/5000 [7:04:18<33:48:05, 29.49s/it]
17%|ββ | 874/5000 [7:04:48<34:08:34, 29.79s/it]
18%|ββ | 875/5000 [7:05:18<34:01:22, 29.69s/it]
18%|ββ | 875/5000 [7:05:18<34:01:22, 29.69s/it]
18%|ββ | 876/5000 [7:05:45<33:13:20, 29.00s/it]
18%|ββ | 877/5000 [7:06:14<33:17:05, 29.06s/it]
18%|ββ | 878/5000 [7:06:42<32:56:44, 28.77s/it]
18%|ββ | 879/5000 [7:07:11<32:55:37, 28.76s/it]
18%|ββ | 880/5000 [7:07:40<33:04:05, 28.89s/it]
18%|ββ | 881/5000 [7:08:07<32:21:35, 28.28s/it]
18%|ββ | 882/5000 [7:08:37<32:51:55, 28.73s/it]
18%|ββ | 883/5000 [7:09:04<32:23:05, 28.32s/it]
18%|ββ | 884/5000 [7:09:34<32:45:27, 28.65s/it]
18%|ββ | 885/5000 [7:10:05<33:36:33, 29.40s/it]
18%|ββ | 886/5000 [7:10:32<32:53:07, 28.78s/it]
18%|ββ | 887/5000 [7:11:03<33:34:37, 29.39s/it]
18%|ββ | 888/5000 [7:11:30<32:49:42, 28.74s/it]
18%|ββ | 889/5000 [7:12:01<33:27:55, 29.31s/it]
18%|ββ | 890/5000 [7:12:30<33:29:28, 29.34s/it]
18%|ββ | 891/5000 [7:12:58<32:54:51, 28.84s/it]
18%|ββ | 892/5000 [7:13:27<33:08:42, 29.05s/it]
18%|ββ | 893/5000 [7:13:57<33:18:21, 29.19s/it]
18%|ββ | 894/5000 [7:14:26<33:22:13, 29.26s/it]
18%|ββ | 895/5000 [7:14:59<34:38:36, 30.38s/it]
18%|ββ | 896/5000 [7:15:27<33:39:13, 29.52s/it]
18%|ββ | 897/5000 [7:15:59<34:24:06, 30.18s/it]
18%|ββ | 898/5000 [7:16:26<33:21:32, 29.28s/it]
18%|ββ | 899/5000 [7:16:58<34:20:00, 30.14s/it]
18%|ββ | 900/5000 [7:17:28<34:12:41, 30.04s/it]
18%|ββ | 900/5000 [7:17:28<34:12:41, 30.04s/it]
18%|ββ | 901/5000 [7:17:57<33:54:36, 29.78s/it]
18%|ββ | 902/5000 [7:18:27<34:02:04, 29.90s/it]
18%|ββ | 903/5000 [7:18:58<34:21:46, 30.19s/it]
18%|ββ | 904/5000 [7:19:35<36:36:11, 32.17s/it]
18%|ββ | 905/5000 [7:20:06<36:13:23, 31.84s/it]
18%|ββ | 906/5000 [7:20:32<34:11:47, 30.07s/it]
18%|ββ | 907/5000 [7:21:01<33:47:49, 29.73s/it]
18%|ββ | 908/5000 [7:21:28<32:48:30, 28.86s/it]
18%|ββ | 909/5000 [7:21:58<33:15:32, 29.27s/it]
18%|ββ | 910/5000 [7:22:28<33:36:40, 29.58s/it]
18%|ββ | 911/5000 [7:22:57<33:11:37, 29.22s/it]
18%|ββ | 912/5000 [7:23:25<32:46:31, 28.86s/it]
18%|ββ | 913/5000 [7:23:52<32:25:41, 28.56s/it]
18%|ββ | 914/5000 [7:24:23<33:02:25, 29.11s/it]
18%|ββ | 915/5000 [7:24:51<32:43:25, 28.84s/it]
18%|ββ | 916/5000 [7:25:21<33:07:57, 29.21s/it]
18%|ββ | 917/5000 [7:25:51<33:30:42, 29.55s/it]
18%|ββ | 918/5000 [7:26:19<32:42:58, 28.85s/it]
18%|ββ | 919/5000 [7:26:47<32:31:37, 28.69s/it]
18%|ββ | 920/5000 [7:27:14<32:06:16, 28.33s/it]
18%|ββ | 921/5000 [7:27:44<32:26:34, 28.63s/it]
18%|ββ | 922/5000 [7:28:12<32:09:55, 28.40s/it]
18%|ββ | 923/5000 [7:28:40<32:08:38, 28.38s/it]
18%|ββ | 924/5000 [7:29:08<31:54:16, 28.18s/it]
18%|ββ | 925/5000 [7:29:35<31:44:30, 28.04s/it]
18%|ββ | 925/5000 [7:29:35<31:44:30, 28.04s/it]
19%|ββ | 926/5000 [7:30:04<31:47:25, 28.09s/it]
19%|ββ | 927/5000 [7:30:32<32:00:48, 28.30s/it]
19%|ββ | 928/5000 [7:31:00<31:50:53, 28.16s/it]
19%|ββ | 929/5000 [7:31:29<31:57:40, 28.26s/it]
19%|ββ | 930/5000 [7:31:55<31:16:47, 27.67s/it]
19%|ββ | 931/5000 [7:32:23<31:32:21, 27.90s/it]
19%|ββ | 932/5000 [7:32:51<31:14:55, 27.65s/it]
19%|ββ | 933/5000 [7:33:18<31:16:09, 27.68s/it]
19%|ββ | 934/5000 [7:33:47<31:28:34, 27.87s/it]
19%|ββ | 935/5000 [7:34:14<31:16:16, 27.69s/it]
19%|ββ | 936/5000 [7:34:42<31:21:30, 27.78s/it]
19%|ββ | 937/5000 [7:35:09<31:08:43, 27.60s/it]
19%|ββ | 938/5000 [7:35:37<31:22:31, 27.81s/it]
19%|ββ | 939/5000 [7:36:04<31:06:48, 27.58s/it]
19%|ββ | 940/5000 [7:36:33<31:35:04, 28.01s/it]
19%|ββ | 941/5000 [7:37:00<31:17:01, 27.75s/it]
19%|ββ | 942/5000 [7:37:29<31:25:26, 27.88s/it]
19%|ββ | 943/5000 [7:37:56<31:23:47, 27.86s/it]
19%|ββ | 944/5000 [7:38:24<31:15:03, 27.74s/it]
19%|ββ | 945/5000 [7:38:52<31:25:30, 27.90s/it]
19%|ββ | 946/5000 [7:39:20<31:15:43, 27.76s/it]
19%|ββ | 947/5000 [7:39:47<31:08:51, 27.67s/it]
19%|ββ | 948/5000 [7:40:15<31:05:34, 27.62s/it]
19%|ββ | 949/5000 [7:40:42<31:08:09, 27.67s/it]
19%|ββ | 950/5000 [7:41:10<31:10:19, 27.71s/it]
19%|ββ | 950/5000 [7:41:10<31:10:19, 27.71s/it]
19%|ββ | 951/5000 [7:41:38<31:02:17, 27.60s/it]
19%|ββ | 952/5000 [7:42:06<31:17:56, 27.84s/it]
19%|ββ | 953/5000 [7:42:33<31:10:47, 27.74s/it]
19%|ββ | 954/5000 [7:43:01<31:10:38, 27.74s/it]
19%|ββ | 955/5000 [7:43:29<31:16:40, 27.84s/it]
19%|ββ | 956/5000 [7:43:57<31:04:24, 27.66s/it]
19%|ββ | 957/5000 [7:44:26<31:34:34, 28.12s/it]
19%|ββ | 958/5000 [7:44:53<31:20:08, 27.91s/it]
19%|ββ | 959/5000 [7:45:26<32:58:33, 29.38s/it]
19%|ββ | 960/5000 [7:45:53<32:19:12, 28.80s/it]
19%|ββ | 961/5000 [7:46:25<33:25:18, 29.79s/it]
19%|ββ | 962/5000 [7:46:56<33:35:23, 29.95s/it]
19%|ββ | 963/5000 [7:47:26<33:36:12, 29.97s/it]
19%|ββ | 964/5000 [7:47:56<33:48:16, 30.15s/it]
19%|ββ | 965/5000 [7:48:28<34:15:00, 30.56s/it]
19%|ββ | 966/5000 [7:48:57<33:51:06, 30.21s/it]
19%|ββ | 967/5000 [7:49:09<27:42:45, 24.74s/it]
19%|ββ | 968/5000 [7:49:20<22:55:00, 20.46s/it]
19%|ββ | 969/5000 [7:49:30<19:35:46, 17.50s/it]
19%|ββ | 970/5000 [7:49:41<17:13:56, 15.39s/it]{'loss': 0.1355, 'learning_rate': 9.446666666666667e-06, 'epoch': 3.0} |
|
{'loss': 0.1341, 'learning_rate': 9.391111111111111e-06, 'epoch': 3.01} |
|
{'loss': 0.1286, 'learning_rate': 9.335555555555557e-06, 'epoch': 3.01} |
|
{'loss': 0.1343, 'learning_rate': 9.280000000000001e-06, 'epoch': 3.02} |
|
{'loss': 0.0982, 'learning_rate': 9.224444444444445e-06, 'epoch': 3.02} |
|
{'loss': 0.0957, 'learning_rate': 9.168888888888889e-06, 'epoch': 3.03} |
|
{'loss': 0.1034, 'learning_rate': 9.113333333333335e-06, 'epoch': 3.03} |
|
{'loss': 0.099, 'learning_rate': 9.057777777777779e-06, 'epoch': 3.04} |
|
{'loss': 0.0863, 'learning_rate': 9.002222222222223e-06, 'epoch': 3.04} |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:02, 2.60s/it][A |
|
Reading metadata...: 15617it [00:02, 8140.52it/s][A |
|
Reading metadata...: 24803it [00:06, 4292.71it/s][A |
|
Reading metadata...: 39955it [00:06, 8647.84it/s][A |
|
Reading metadata...: 49034it [00:08, 6612.74it/s][A |
|
Reading metadata...: 59039it [00:10, 5732.00it/s][A |
|
Reading metadata...: 73373it [00:10, 9308.71it/s][A |
|
Reading metadata...: 80810it [00:12, 6627.27it/s][A |
|
Reading metadata...: 94811it [00:12, 10402.09it/s][A |
|
Reading metadata...: 102746it [00:15, 7114.62it/s][A |
|
Reading metadata...: 114047it [00:17, 6416.84it/s][A |
|
Reading metadata...: 127999it [00:17, 9826.73it/s][A |
|
Reading metadata...: 135058it [00:18, 9788.92it/s][A |
|
Reading metadata...: 149274it [00:18, 15031.61it/s][A |
|
Reading metadata...: 157206it [00:20, 8902.85it/s] [A |
|
Reading metadata...: 168342it [00:27, 3691.69it/s][A |
|
Reading metadata...: 180935it [00:27, 5491.80it/s][A |
|
Reading metadata...: 187520it [00:29, 4752.96it/s][A |
|
Reading metadata...: 202142it [00:29, 7631.55it/s][A |
|
Reading metadata...: 209822it [00:31, 6112.93it/s][A |
|
Reading metadata...: 221914it [00:33, 6083.09it/s][A
Reading metadata...: 230467it [00:33, 6849.17it/s] |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:02, 2.09s/it][A |
|
Reading metadata...: 15257it [00:02, 9746.46it/s][A
Reading metadata...: 15520it [00:02, 7068.64it/s] |
|
19%|ββ | 971/5000 [7:52:06<60:48:31, 54.33s/it]
19%|ββ | 972/5000 [7:52:38<53:08:38, 47.50s/it]
19%|ββ | 973/5000 [7:53:07<46:54:55, 41.94s/it]
19%|ββ | 974/5000 [7:53:34<42:06:19, 37.65s/it]
20%|ββ | 975/5000 [7:54:04<39:23:15, 35.23s/it]
20%|ββ | 975/5000 [7:54:04<39:23:15, 35.23s/it]
20%|ββ | 976/5000 [7:54:31<36:49:15, 32.94s/it]
20%|ββ | 977/5000 [7:54:59<34:59:49, 31.32s/it]
20%|ββ | 978/5000 [7:55:29<34:29:03, 30.87s/it]
20%|ββ | 979/5000 [7:55:56<33:18:25, 29.82s/it]
20%|ββ | 980/5000 [7:56:26<33:26:54, 29.95s/it]
20%|ββ | 981/5000 [7:56:54<32:35:09, 29.19s/it]
20%|ββ | 982/5000 [7:57:23<32:45:47, 29.35s/it]
20%|ββ | 983/5000 [7:57:52<32:28:20, 29.10s/it]
20%|ββ | 984/5000 [7:58:19<31:49:56, 28.53s/it]
20%|ββ | 985/5000 [7:58:49<32:10:44, 28.85s/it]
20%|ββ | 986/5000 [7:59:16<31:32:44, 28.29s/it]
20%|ββ | 987/5000 [7:59:44<31:29:57, 28.26s/it]
20%|ββ | 988/5000 [8:00:13<31:47:08, 28.52s/it]
20%|ββ | 989/5000 [8:00:40<31:23:04, 28.17s/it]
20%|ββ | 990/5000 [8:01:11<32:01:09, 28.75s/it]
20%|ββ | 991/5000 [8:01:38<31:34:53, 28.36s/it]
20%|ββ | 992/5000 [8:02:07<31:41:55, 28.47s/it]
20%|ββ | 993/5000 [8:02:35<31:27:53, 28.27s/it]
20%|ββ | 994/5000 [8:03:02<31:15:46, 28.09s/it]
20%|ββ | 995/5000 [8:03:29<30:57:37, 27.83s/it]
20%|ββ | 996/5000 [8:03:59<31:24:05, 28.23s/it]
20%|ββ | 997/5000 [8:04:27<31:29:14, 28.32s/it]
20%|ββ | 998/5000 [8:04:55<31:25:54, 28.27s/it]
20%|ββ | 999/5000 [8:05:24<31:33:52, 28.40s/it]
20%|ββ | 1000/5000 [8:05:54<32:01:14, 28.82s/it]
20%|ββ | 1000/5000 [8:05:54<32:01:14, 28.82s/it][INFO|trainer.py:3138] 2023-05-10 17:47:34,809 >> ***** Running Evaluation ***** |
|
[INFO|trainer.py:3142] 2023-05-10 17:47:34,809 >> Num examples: Unknown |
|
[INFO|trainer.py:3143] 2023-05-10 17:47:34,809 >> Batch size = 64 |
|
{'loss': 0.1075, 'learning_rate': 8.946666666666669e-06, 'epoch': 4.0} |
|
{'loss': 0.1386, 'learning_rate': 8.891111111111111e-06, 'epoch': 4.01} |
|
|
|
Reading metadata...: 0it [00:00, ?it/s][A |
|
Reading metadata...: 1it [00:02, 2.10s/it][A
Reading metadata...: 15520it [00:02, 7062.10it/s] |
|
[INFO|trainer_utils.py:693] 2023-05-10 17:47:50,596 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. |
|
20%|ββ | 1000/5000 [9:02:19<32:01:14, 28.82s/it][INFO|trainer.py:2877] 2023-05-10 18:43:59,730 >> Saving model checkpoint to ./checkpoint-1000 |
|
[INFO|configuration_utils.py:458] 2023-05-10 18:43:59,735 >> Configuration saved in ./checkpoint-1000/config.json |
|
[INFO|configuration_utils.py:364] 2023-05-10 18:43:59,739 >> Configuration saved in ./checkpoint-1000/generation_config.json |
|
[INFO|modeling_utils.py:1855] 2023-05-10 18:44:03,168 >> Model weights saved in ./checkpoint-1000/pytorch_model.bin |
|
[INFO|feature_extraction_utils.py:369] 2023-05-10 18:44:03,173 >> Feature extractor saved in ./checkpoint-1000/preprocessor_config.json |
|
[INFO|feature_extraction_utils.py:369] 2023-05-10 18:44:11,165 >> Feature extractor saved in ./preprocessor_config.json |
|
Adding files tracked by Git LFS: ['wandb/run-20230509_115211-hq92t8sj/run-hq92t8sj.wandb', 'wandb/run-20230510_094132-lvsln7ks/run-lvsln7ks.wandb']. This may take a bit of time if the files are large. |
|
{'eval_loss': 0.24644243717193604, 'eval_wer': 9.800036380645496, 'eval_runtime': 3384.9122, 'eval_samples_per_second': 4.585, 'eval_steps_per_second': 0.072, 'epoch': 4.01} |
|
05/10/2023 18:44:21 - WARNING - huggingface_hub.repository - Adding files tracked by Git LFS: ['wandb/run-20230509_115211-hq92t8sj/run-hq92t8sj.wandb', 'wandb/run-20230510_094132-lvsln7ks/run-lvsln7ks.wandb']. This may take a bit of time if the files are large. |
|
|