|
2023-10-24 18:08:00,796 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,797 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-24 18:08:00,797 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,797 MultiCorpus: 7936 train + 992 dev + 992 test sentences |
|
- NER_ICDAR_EUROPEANA Corpus: 7936 train + 992 dev + 992 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/fr |
|
2023-10-24 18:08:00,797 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,797 Train: 7936 sentences |
|
2023-10-24 18:08:00,797 (train_with_dev=False, train_with_test=False) |
|
2023-10-24 18:08:00,797 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,797 Training Params: |
|
2023-10-24 18:08:00,797 - learning_rate: "3e-05" |
|
2023-10-24 18:08:00,797 - mini_batch_size: "4" |
|
2023-10-24 18:08:00,797 - max_epochs: "10" |
|
2023-10-24 18:08:00,797 - shuffle: "True" |
|
2023-10-24 18:08:00,797 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,797 Plugins: |
|
2023-10-24 18:08:00,797 - TensorboardLogger |
|
2023-10-24 18:08:00,797 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-24 18:08:00,797 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,797 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-24 18:08:00,797 - metric: "('micro avg', 'f1-score')" |
|
2023-10-24 18:08:00,797 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,797 Computation: |
|
2023-10-24 18:08:00,797 - compute on device: cuda:0 |
|
2023-10-24 18:08:00,798 - embedding storage: none |
|
2023-10-24 18:08:00,798 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,798 Model training base path: "hmbench-icdar/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-4" |
|
2023-10-24 18:08:00,798 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,798 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:08:00,798 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-24 18:08:12,645 epoch 1 - iter 198/1984 - loss 1.65883842 - time (sec): 11.85 - samples/sec: 1399.30 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 18:08:24,650 epoch 1 - iter 396/1984 - loss 0.99109184 - time (sec): 23.85 - samples/sec: 1395.13 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 18:08:36,790 epoch 1 - iter 594/1984 - loss 0.74189793 - time (sec): 35.99 - samples/sec: 1363.42 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 18:08:48,710 epoch 1 - iter 792/1984 - loss 0.60171309 - time (sec): 47.91 - samples/sec: 1355.98 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 18:09:00,807 epoch 1 - iter 990/1984 - loss 0.51096698 - time (sec): 60.01 - samples/sec: 1357.21 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 18:09:12,838 epoch 1 - iter 1188/1984 - loss 0.45403133 - time (sec): 72.04 - samples/sec: 1348.75 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 18:09:25,213 epoch 1 - iter 1386/1984 - loss 0.41042045 - time (sec): 84.41 - samples/sec: 1348.01 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 18:09:37,257 epoch 1 - iter 1584/1984 - loss 0.37518830 - time (sec): 96.46 - samples/sec: 1351.04 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 18:09:49,215 epoch 1 - iter 1782/1984 - loss 0.34872736 - time (sec): 108.42 - samples/sec: 1350.48 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 18:10:01,661 epoch 1 - iter 1980/1984 - loss 0.32450599 - time (sec): 120.86 - samples/sec: 1354.91 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 18:10:01,887 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:10:01,887 EPOCH 1 done: loss 0.3243 - lr: 0.000030 |
|
2023-10-24 18:10:04,980 DEV : loss 0.08822248876094818 - f1-score (micro avg) 0.7194 |
|
2023-10-24 18:10:04,995 saving best model |
|
2023-10-24 18:10:05,463 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:10:17,722 epoch 2 - iter 198/1984 - loss 0.11538531 - time (sec): 12.26 - samples/sec: 1406.65 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 18:10:29,871 epoch 2 - iter 396/1984 - loss 0.11078638 - time (sec): 24.41 - samples/sec: 1382.17 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 18:10:41,861 epoch 2 - iter 594/1984 - loss 0.10902064 - time (sec): 36.40 - samples/sec: 1369.84 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 18:10:53,970 epoch 2 - iter 792/1984 - loss 0.11119279 - time (sec): 48.51 - samples/sec: 1366.96 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 18:11:06,235 epoch 2 - iter 990/1984 - loss 0.10982936 - time (sec): 60.77 - samples/sec: 1372.84 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 18:11:18,421 epoch 2 - iter 1188/1984 - loss 0.11198539 - time (sec): 72.96 - samples/sec: 1366.86 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 18:11:30,288 epoch 2 - iter 1386/1984 - loss 0.11253700 - time (sec): 84.82 - samples/sec: 1355.61 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 18:11:42,371 epoch 2 - iter 1584/1984 - loss 0.11167056 - time (sec): 96.91 - samples/sec: 1350.00 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 18:11:54,534 epoch 2 - iter 1782/1984 - loss 0.10945571 - time (sec): 109.07 - samples/sec: 1353.54 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 18:12:06,503 epoch 2 - iter 1980/1984 - loss 0.10890702 - time (sec): 121.04 - samples/sec: 1348.29 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 18:12:06,922 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:12:06,922 EPOCH 2 done: loss 0.1089 - lr: 0.000027 |
|
2023-10-24 18:12:10,353 DEV : loss 0.10544043034315109 - f1-score (micro avg) 0.7355 |
|
2023-10-24 18:12:10,368 saving best model |
|
2023-10-24 18:12:10,938 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:12:22,889 epoch 3 - iter 198/1984 - loss 0.07127408 - time (sec): 11.95 - samples/sec: 1348.12 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 18:12:35,106 epoch 3 - iter 396/1984 - loss 0.06552621 - time (sec): 24.17 - samples/sec: 1369.18 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 18:12:47,225 epoch 3 - iter 594/1984 - loss 0.07374619 - time (sec): 36.29 - samples/sec: 1341.65 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 18:12:59,160 epoch 3 - iter 792/1984 - loss 0.07723250 - time (sec): 48.22 - samples/sec: 1325.49 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 18:13:11,313 epoch 3 - iter 990/1984 - loss 0.07632000 - time (sec): 60.37 - samples/sec: 1334.15 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 18:13:23,489 epoch 3 - iter 1188/1984 - loss 0.07708131 - time (sec): 72.55 - samples/sec: 1340.39 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 18:13:35,706 epoch 3 - iter 1386/1984 - loss 0.07772303 - time (sec): 84.77 - samples/sec: 1351.33 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 18:13:47,929 epoch 3 - iter 1584/1984 - loss 0.07931530 - time (sec): 96.99 - samples/sec: 1350.57 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 18:14:00,114 epoch 3 - iter 1782/1984 - loss 0.08200348 - time (sec): 109.17 - samples/sec: 1353.32 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 18:14:12,130 epoch 3 - iter 1980/1984 - loss 0.08111184 - time (sec): 121.19 - samples/sec: 1349.68 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 18:14:12,387 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:14:12,387 EPOCH 3 done: loss 0.0810 - lr: 0.000023 |
|
2023-10-24 18:14:15,515 DEV : loss 0.13007180392742157 - f1-score (micro avg) 0.7582 |
|
2023-10-24 18:14:15,530 saving best model |
|
2023-10-24 18:14:16,148 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:14:28,115 epoch 4 - iter 198/1984 - loss 0.04311242 - time (sec): 11.97 - samples/sec: 1296.25 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 18:14:40,318 epoch 4 - iter 396/1984 - loss 0.05582813 - time (sec): 24.17 - samples/sec: 1338.58 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 18:14:52,490 epoch 4 - iter 594/1984 - loss 0.05639418 - time (sec): 36.34 - samples/sec: 1361.89 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 18:15:04,460 epoch 4 - iter 792/1984 - loss 0.05593021 - time (sec): 48.31 - samples/sec: 1341.54 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 18:15:16,489 epoch 4 - iter 990/1984 - loss 0.05809953 - time (sec): 60.34 - samples/sec: 1333.17 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 18:15:28,801 epoch 4 - iter 1188/1984 - loss 0.05764319 - time (sec): 72.65 - samples/sec: 1344.01 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 18:15:40,962 epoch 4 - iter 1386/1984 - loss 0.05706164 - time (sec): 84.81 - samples/sec: 1341.85 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 18:15:53,083 epoch 4 - iter 1584/1984 - loss 0.05865109 - time (sec): 96.93 - samples/sec: 1341.23 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 18:16:05,301 epoch 4 - iter 1782/1984 - loss 0.05768982 - time (sec): 109.15 - samples/sec: 1349.70 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 18:16:17,487 epoch 4 - iter 1980/1984 - loss 0.05892127 - time (sec): 121.34 - samples/sec: 1348.47 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 18:16:17,741 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:16:17,741 EPOCH 4 done: loss 0.0588 - lr: 0.000020 |
|
2023-10-24 18:16:21,164 DEV : loss 0.14774079620838165 - f1-score (micro avg) 0.7464 |
|
2023-10-24 18:16:21,179 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:16:33,451 epoch 5 - iter 198/1984 - loss 0.05564537 - time (sec): 12.27 - samples/sec: 1372.89 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 18:16:45,465 epoch 5 - iter 396/1984 - loss 0.04733119 - time (sec): 24.29 - samples/sec: 1359.12 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 18:16:57,576 epoch 5 - iter 594/1984 - loss 0.04636178 - time (sec): 36.40 - samples/sec: 1366.23 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 18:17:09,967 epoch 5 - iter 792/1984 - loss 0.04486463 - time (sec): 48.79 - samples/sec: 1379.59 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 18:17:22,045 epoch 5 - iter 990/1984 - loss 0.04414787 - time (sec): 60.86 - samples/sec: 1367.00 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 18:17:33,989 epoch 5 - iter 1188/1984 - loss 0.04391772 - time (sec): 72.81 - samples/sec: 1357.52 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 18:17:46,001 epoch 5 - iter 1386/1984 - loss 0.04395576 - time (sec): 84.82 - samples/sec: 1352.18 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 18:17:58,166 epoch 5 - iter 1584/1984 - loss 0.04447887 - time (sec): 96.99 - samples/sec: 1354.57 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 18:18:10,146 epoch 5 - iter 1782/1984 - loss 0.04456227 - time (sec): 108.97 - samples/sec: 1347.63 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 18:18:22,438 epoch 5 - iter 1980/1984 - loss 0.04356891 - time (sec): 121.26 - samples/sec: 1349.39 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 18:18:22,692 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:18:22,693 EPOCH 5 done: loss 0.0435 - lr: 0.000017 |
|
2023-10-24 18:18:25,801 DEV : loss 0.1785285472869873 - f1-score (micro avg) 0.7691 |
|
2023-10-24 18:18:25,816 saving best model |
|
2023-10-24 18:18:26,434 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:18:38,509 epoch 6 - iter 198/1984 - loss 0.03770600 - time (sec): 12.07 - samples/sec: 1341.65 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 18:18:50,512 epoch 6 - iter 396/1984 - loss 0.03533865 - time (sec): 24.08 - samples/sec: 1337.14 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 18:19:02,726 epoch 6 - iter 594/1984 - loss 0.03400472 - time (sec): 36.29 - samples/sec: 1352.00 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 18:19:14,776 epoch 6 - iter 792/1984 - loss 0.03310510 - time (sec): 48.34 - samples/sec: 1338.78 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 18:19:27,111 epoch 6 - iter 990/1984 - loss 0.03249866 - time (sec): 60.68 - samples/sec: 1350.34 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 18:19:39,316 epoch 6 - iter 1188/1984 - loss 0.03090049 - time (sec): 72.88 - samples/sec: 1340.35 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 18:19:51,893 epoch 6 - iter 1386/1984 - loss 0.03241745 - time (sec): 85.46 - samples/sec: 1340.48 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 18:20:03,917 epoch 6 - iter 1584/1984 - loss 0.03297873 - time (sec): 97.48 - samples/sec: 1338.72 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 18:20:16,009 epoch 6 - iter 1782/1984 - loss 0.03333018 - time (sec): 109.57 - samples/sec: 1338.46 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 18:20:28,225 epoch 6 - iter 1980/1984 - loss 0.03294409 - time (sec): 121.79 - samples/sec: 1344.26 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 18:20:28,466 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:20:28,467 EPOCH 6 done: loss 0.0329 - lr: 0.000013 |
|
2023-10-24 18:20:31,597 DEV : loss 0.1928633749485016 - f1-score (micro avg) 0.7655 |
|
2023-10-24 18:20:31,612 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:20:44,023 epoch 7 - iter 198/1984 - loss 0.02473915 - time (sec): 12.41 - samples/sec: 1414.46 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 18:20:56,221 epoch 7 - iter 396/1984 - loss 0.02715590 - time (sec): 24.61 - samples/sec: 1392.39 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 18:21:08,255 epoch 7 - iter 594/1984 - loss 0.02685085 - time (sec): 36.64 - samples/sec: 1381.08 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 18:21:20,492 epoch 7 - iter 792/1984 - loss 0.02566708 - time (sec): 48.88 - samples/sec: 1369.54 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 18:21:32,601 epoch 7 - iter 990/1984 - loss 0.02548493 - time (sec): 60.99 - samples/sec: 1361.78 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 18:21:44,601 epoch 7 - iter 1188/1984 - loss 0.02537405 - time (sec): 72.99 - samples/sec: 1354.50 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 18:21:56,645 epoch 7 - iter 1386/1984 - loss 0.02423187 - time (sec): 85.03 - samples/sec: 1348.01 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 18:22:08,738 epoch 7 - iter 1584/1984 - loss 0.02555620 - time (sec): 97.12 - samples/sec: 1347.41 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 18:22:20,847 epoch 7 - iter 1782/1984 - loss 0.02502907 - time (sec): 109.23 - samples/sec: 1349.96 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 18:22:32,913 epoch 7 - iter 1980/1984 - loss 0.02462085 - time (sec): 121.30 - samples/sec: 1348.75 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 18:22:33,203 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:22:33,203 EPOCH 7 done: loss 0.0246 - lr: 0.000010 |
|
2023-10-24 18:22:36,323 DEV : loss 0.20923519134521484 - f1-score (micro avg) 0.7735 |
|
2023-10-24 18:22:36,338 saving best model |
|
2023-10-24 18:22:36,954 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:22:49,099 epoch 8 - iter 198/1984 - loss 0.01338123 - time (sec): 12.14 - samples/sec: 1349.93 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 18:23:01,404 epoch 8 - iter 396/1984 - loss 0.01458755 - time (sec): 24.45 - samples/sec: 1372.58 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 18:23:13,503 epoch 8 - iter 594/1984 - loss 0.01472531 - time (sec): 36.55 - samples/sec: 1363.60 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 18:23:25,875 epoch 8 - iter 792/1984 - loss 0.01510012 - time (sec): 48.92 - samples/sec: 1352.70 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 18:23:38,187 epoch 8 - iter 990/1984 - loss 0.01631973 - time (sec): 61.23 - samples/sec: 1362.06 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 18:23:50,209 epoch 8 - iter 1188/1984 - loss 0.01696770 - time (sec): 73.25 - samples/sec: 1358.52 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 18:24:02,466 epoch 8 - iter 1386/1984 - loss 0.01742849 - time (sec): 85.51 - samples/sec: 1352.90 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 18:24:14,519 epoch 8 - iter 1584/1984 - loss 0.01641470 - time (sec): 97.56 - samples/sec: 1345.67 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 18:24:26,773 epoch 8 - iter 1782/1984 - loss 0.01644926 - time (sec): 109.82 - samples/sec: 1349.03 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 18:24:38,678 epoch 8 - iter 1980/1984 - loss 0.01625596 - time (sec): 121.72 - samples/sec: 1344.35 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 18:24:38,916 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:24:38,916 EPOCH 8 done: loss 0.0162 - lr: 0.000007 |
|
2023-10-24 18:24:42,041 DEV : loss 0.22515711188316345 - f1-score (micro avg) 0.7676 |
|
2023-10-24 18:24:42,056 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:24:54,265 epoch 9 - iter 198/1984 - loss 0.00852019 - time (sec): 12.21 - samples/sec: 1354.85 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 18:25:06,179 epoch 9 - iter 396/1984 - loss 0.01328350 - time (sec): 24.12 - samples/sec: 1336.12 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 18:25:18,591 epoch 9 - iter 594/1984 - loss 0.01339687 - time (sec): 36.53 - samples/sec: 1348.93 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 18:25:30,817 epoch 9 - iter 792/1984 - loss 0.01234555 - time (sec): 48.76 - samples/sec: 1347.68 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 18:25:42,890 epoch 9 - iter 990/1984 - loss 0.01167028 - time (sec): 60.83 - samples/sec: 1352.82 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 18:25:54,838 epoch 9 - iter 1188/1984 - loss 0.01071810 - time (sec): 72.78 - samples/sec: 1344.58 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 18:26:07,055 epoch 9 - iter 1386/1984 - loss 0.01111198 - time (sec): 85.00 - samples/sec: 1348.21 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 18:26:19,193 epoch 9 - iter 1584/1984 - loss 0.01162792 - time (sec): 97.14 - samples/sec: 1349.38 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 18:26:31,487 epoch 9 - iter 1782/1984 - loss 0.01233370 - time (sec): 109.43 - samples/sec: 1347.80 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 18:26:43,546 epoch 9 - iter 1980/1984 - loss 0.01214635 - time (sec): 121.49 - samples/sec: 1347.15 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 18:26:43,791 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:26:43,791 EPOCH 9 done: loss 0.0122 - lr: 0.000003 |
|
2023-10-24 18:26:46,921 DEV : loss 0.2285650223493576 - f1-score (micro avg) 0.7685 |
|
2023-10-24 18:26:46,936 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:26:59,335 epoch 10 - iter 198/1984 - loss 0.00472620 - time (sec): 12.40 - samples/sec: 1283.74 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 18:27:11,443 epoch 10 - iter 396/1984 - loss 0.00589847 - time (sec): 24.51 - samples/sec: 1336.66 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 18:27:23,523 epoch 10 - iter 594/1984 - loss 0.00695023 - time (sec): 36.59 - samples/sec: 1327.88 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 18:27:35,805 epoch 10 - iter 792/1984 - loss 0.00775105 - time (sec): 48.87 - samples/sec: 1333.36 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 18:27:48,037 epoch 10 - iter 990/1984 - loss 0.00815130 - time (sec): 61.10 - samples/sec: 1337.31 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 18:28:00,332 epoch 10 - iter 1188/1984 - loss 0.00801025 - time (sec): 73.40 - samples/sec: 1342.12 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 18:28:12,341 epoch 10 - iter 1386/1984 - loss 0.00793725 - time (sec): 85.40 - samples/sec: 1342.25 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 18:28:24,359 epoch 10 - iter 1584/1984 - loss 0.00790952 - time (sec): 97.42 - samples/sec: 1340.60 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 18:28:36,593 epoch 10 - iter 1782/1984 - loss 0.00760356 - time (sec): 109.66 - samples/sec: 1347.44 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 18:28:48,671 epoch 10 - iter 1980/1984 - loss 0.00764524 - time (sec): 121.73 - samples/sec: 1344.84 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 18:28:48,912 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:28:48,912 EPOCH 10 done: loss 0.0076 - lr: 0.000000 |
|
2023-10-24 18:28:52,025 DEV : loss 0.23562917113304138 - f1-score (micro avg) 0.7713 |
|
2023-10-24 18:28:52,525 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 18:28:52,526 Loading model from best epoch ... |
|
2023-10-24 18:28:53,997 SequenceTagger predicts: Dictionary with 13 tags: O, S-PER, B-PER, E-PER, I-PER, S-LOC, B-LOC, E-LOC, I-LOC, S-ORG, B-ORG, E-ORG, I-ORG |
|
2023-10-24 18:28:57,085 |
|
Results: |
|
- F-score (micro) 0.7853 |
|
- F-score (macro) 0.6936 |
|
- Accuracy 0.6639 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
LOC 0.8353 0.8595 0.8473 655 |
|
PER 0.7250 0.7803 0.7516 223 |
|
ORG 0.5699 0.4173 0.4818 127 |
|
|
|
micro avg 0.7845 0.7861 0.7853 1005 |
|
macro avg 0.7101 0.6857 0.6936 1005 |
|
weighted avg 0.7773 0.7861 0.7799 1005 |
|
|
|
2023-10-24 18:28:57,085 ---------------------------------------------------------------------------------------------------- |
|
|