|
2023-10-24 20:02:47,488 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,489 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-24 20:02:47,489 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,489 MultiCorpus: 7936 train + 992 dev + 992 test sentences |
|
- NER_ICDAR_EUROPEANA Corpus: 7936 train + 992 dev + 992 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/fr |
|
2023-10-24 20:02:47,489 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,489 Train: 7936 sentences |
|
2023-10-24 20:02:47,489 (train_with_dev=False, train_with_test=False) |
|
2023-10-24 20:02:47,489 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,489 Training Params: |
|
2023-10-24 20:02:47,489 - learning_rate: "3e-05" |
|
2023-10-24 20:02:47,489 - mini_batch_size: "8" |
|
2023-10-24 20:02:47,489 - max_epochs: "10" |
|
2023-10-24 20:02:47,489 - shuffle: "True" |
|
2023-10-24 20:02:47,489 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,489 Plugins: |
|
2023-10-24 20:02:47,489 - TensorboardLogger |
|
2023-10-24 20:02:47,489 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-24 20:02:47,489 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,489 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-24 20:02:47,489 - metric: "('micro avg', 'f1-score')" |
|
2023-10-24 20:02:47,489 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,489 Computation: |
|
2023-10-24 20:02:47,489 - compute on device: cuda:0 |
|
2023-10-24 20:02:47,490 - embedding storage: none |
|
2023-10-24 20:02:47,490 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,490 Model training base path: "hmbench-icdar/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs8-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-5" |
|
2023-10-24 20:02:47,490 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,490 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:02:47,490 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-24 20:02:55,750 epoch 1 - iter 99/992 - loss 1.59118482 - time (sec): 8.26 - samples/sec: 1999.95 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 20:03:03,880 epoch 1 - iter 198/992 - loss 0.97946191 - time (sec): 16.39 - samples/sec: 1972.66 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 20:03:12,289 epoch 1 - iter 297/992 - loss 0.73168403 - time (sec): 24.80 - samples/sec: 1979.81 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 20:03:20,578 epoch 1 - iter 396/992 - loss 0.60297547 - time (sec): 33.09 - samples/sec: 1965.17 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 20:03:28,872 epoch 1 - iter 495/992 - loss 0.51620364 - time (sec): 41.38 - samples/sec: 1969.89 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 20:03:37,072 epoch 1 - iter 594/992 - loss 0.45718439 - time (sec): 49.58 - samples/sec: 1974.42 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 20:03:45,223 epoch 1 - iter 693/992 - loss 0.41193573 - time (sec): 57.73 - samples/sec: 1983.59 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 20:03:53,478 epoch 1 - iter 792/992 - loss 0.37602030 - time (sec): 65.99 - samples/sec: 1976.35 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 20:04:01,767 epoch 1 - iter 891/992 - loss 0.34693143 - time (sec): 74.28 - samples/sec: 1979.67 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 20:04:10,248 epoch 1 - iter 990/992 - loss 0.32443529 - time (sec): 82.76 - samples/sec: 1978.41 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 20:04:10,388 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:04:10,389 EPOCH 1 done: loss 0.3240 - lr: 0.000030 |
|
2023-10-24 20:04:13,450 DEV : loss 0.0944613367319107 - f1-score (micro avg) 0.7083 |
|
2023-10-24 20:04:13,465 saving best model |
|
2023-10-24 20:04:13,933 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:04:22,342 epoch 2 - iter 99/992 - loss 0.10275485 - time (sec): 8.41 - samples/sec: 1958.67 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 20:04:30,673 epoch 2 - iter 198/992 - loss 0.10068022 - time (sec): 16.74 - samples/sec: 1970.48 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 20:04:38,931 epoch 2 - iter 297/992 - loss 0.10125339 - time (sec): 25.00 - samples/sec: 1992.84 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 20:04:47,132 epoch 2 - iter 396/992 - loss 0.10339627 - time (sec): 33.20 - samples/sec: 1975.86 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 20:04:55,179 epoch 2 - iter 495/992 - loss 0.10317839 - time (sec): 41.25 - samples/sec: 1966.63 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 20:05:03,440 epoch 2 - iter 594/992 - loss 0.10299473 - time (sec): 49.51 - samples/sec: 1963.90 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 20:05:11,498 epoch 2 - iter 693/992 - loss 0.10106078 - time (sec): 57.56 - samples/sec: 1970.87 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 20:05:20,387 epoch 2 - iter 792/992 - loss 0.10005304 - time (sec): 66.45 - samples/sec: 1966.32 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 20:05:28,936 epoch 2 - iter 891/992 - loss 0.09826429 - time (sec): 75.00 - samples/sec: 1958.15 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 20:05:37,201 epoch 2 - iter 990/992 - loss 0.09701001 - time (sec): 83.27 - samples/sec: 1967.50 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 20:05:37,342 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:05:37,342 EPOCH 2 done: loss 0.0973 - lr: 0.000027 |
|
2023-10-24 20:05:40,755 DEV : loss 0.08149731904268265 - f1-score (micro avg) 0.7535 |
|
2023-10-24 20:05:40,770 saving best model |
|
2023-10-24 20:05:41,365 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:05:49,755 epoch 3 - iter 99/992 - loss 0.06483087 - time (sec): 8.39 - samples/sec: 2061.97 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 20:05:58,130 epoch 3 - iter 198/992 - loss 0.06390309 - time (sec): 16.76 - samples/sec: 2039.46 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 20:06:07,135 epoch 3 - iter 297/992 - loss 0.06825244 - time (sec): 25.77 - samples/sec: 1982.26 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 20:06:15,363 epoch 3 - iter 396/992 - loss 0.06583388 - time (sec): 34.00 - samples/sec: 1954.94 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 20:06:23,441 epoch 3 - iter 495/992 - loss 0.06782042 - time (sec): 42.08 - samples/sec: 1959.00 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 20:06:31,799 epoch 3 - iter 594/992 - loss 0.06816392 - time (sec): 50.43 - samples/sec: 1942.39 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 20:06:39,927 epoch 3 - iter 693/992 - loss 0.06951408 - time (sec): 58.56 - samples/sec: 1949.22 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 20:06:48,238 epoch 3 - iter 792/992 - loss 0.06950701 - time (sec): 66.87 - samples/sec: 1953.15 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 20:06:56,390 epoch 3 - iter 891/992 - loss 0.06889888 - time (sec): 75.02 - samples/sec: 1962.27 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 20:07:04,708 epoch 3 - iter 990/992 - loss 0.06860988 - time (sec): 83.34 - samples/sec: 1963.77 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 20:07:04,867 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:07:04,867 EPOCH 3 done: loss 0.0688 - lr: 0.000023 |
|
2023-10-24 20:07:07,975 DEV : loss 0.11014377325773239 - f1-score (micro avg) 0.7629 |
|
2023-10-24 20:07:07,990 saving best model |
|
2023-10-24 20:07:08,684 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:07:17,018 epoch 4 - iter 99/992 - loss 0.04359741 - time (sec): 8.33 - samples/sec: 1895.18 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 20:07:25,176 epoch 4 - iter 198/992 - loss 0.05400608 - time (sec): 16.49 - samples/sec: 1937.48 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 20:07:33,462 epoch 4 - iter 297/992 - loss 0.05255787 - time (sec): 24.78 - samples/sec: 1944.78 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 20:07:41,717 epoch 4 - iter 396/992 - loss 0.05058843 - time (sec): 33.03 - samples/sec: 1950.47 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 20:07:50,115 epoch 4 - iter 495/992 - loss 0.05117591 - time (sec): 41.43 - samples/sec: 1958.07 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 20:07:58,308 epoch 4 - iter 594/992 - loss 0.04962431 - time (sec): 49.62 - samples/sec: 1961.95 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 20:08:06,737 epoch 4 - iter 693/992 - loss 0.04861731 - time (sec): 58.05 - samples/sec: 1967.11 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 20:08:15,545 epoch 4 - iter 792/992 - loss 0.04912095 - time (sec): 66.86 - samples/sec: 1971.14 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 20:08:23,875 epoch 4 - iter 891/992 - loss 0.04953506 - time (sec): 75.19 - samples/sec: 1970.26 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 20:08:32,116 epoch 4 - iter 990/992 - loss 0.04977895 - time (sec): 83.43 - samples/sec: 1963.13 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 20:08:32,251 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:08:32,251 EPOCH 4 done: loss 0.0497 - lr: 0.000020 |
|
2023-10-24 20:08:35,692 DEV : loss 0.1265212893486023 - f1-score (micro avg) 0.7535 |
|
2023-10-24 20:08:35,707 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:08:44,169 epoch 5 - iter 99/992 - loss 0.03672420 - time (sec): 8.46 - samples/sec: 1964.14 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 20:08:52,494 epoch 5 - iter 198/992 - loss 0.03685557 - time (sec): 16.79 - samples/sec: 1965.83 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 20:09:00,693 epoch 5 - iter 297/992 - loss 0.03598710 - time (sec): 24.99 - samples/sec: 1975.35 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 20:09:09,148 epoch 5 - iter 396/992 - loss 0.03705036 - time (sec): 33.44 - samples/sec: 1973.37 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 20:09:17,698 epoch 5 - iter 495/992 - loss 0.03504569 - time (sec): 41.99 - samples/sec: 1992.63 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 20:09:25,948 epoch 5 - iter 594/992 - loss 0.03559228 - time (sec): 50.24 - samples/sec: 1989.90 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 20:09:34,628 epoch 5 - iter 693/992 - loss 0.03470096 - time (sec): 58.92 - samples/sec: 1987.81 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 20:09:42,779 epoch 5 - iter 792/992 - loss 0.03509798 - time (sec): 67.07 - samples/sec: 1980.51 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 20:09:50,997 epoch 5 - iter 891/992 - loss 0.03552997 - time (sec): 75.29 - samples/sec: 1964.66 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 20:09:59,196 epoch 5 - iter 990/992 - loss 0.03618223 - time (sec): 83.49 - samples/sec: 1961.18 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 20:09:59,345 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:09:59,346 EPOCH 5 done: loss 0.0362 - lr: 0.000017 |
|
2023-10-24 20:10:02,459 DEV : loss 0.1455710083246231 - f1-score (micro avg) 0.7551 |
|
2023-10-24 20:10:02,475 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:10:10,749 epoch 6 - iter 99/992 - loss 0.02373328 - time (sec): 8.27 - samples/sec: 1994.80 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 20:10:18,809 epoch 6 - iter 198/992 - loss 0.02477475 - time (sec): 16.33 - samples/sec: 1964.86 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 20:10:27,003 epoch 6 - iter 297/992 - loss 0.02420627 - time (sec): 24.53 - samples/sec: 1991.25 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 20:10:35,859 epoch 6 - iter 396/992 - loss 0.02380439 - time (sec): 33.38 - samples/sec: 2001.97 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 20:10:44,194 epoch 6 - iter 495/992 - loss 0.02631127 - time (sec): 41.72 - samples/sec: 1979.48 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 20:10:52,307 epoch 6 - iter 594/992 - loss 0.02537463 - time (sec): 49.83 - samples/sec: 1970.50 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 20:11:00,566 epoch 6 - iter 693/992 - loss 0.02508577 - time (sec): 58.09 - samples/sec: 1964.72 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 20:11:09,022 epoch 6 - iter 792/992 - loss 0.02609448 - time (sec): 66.55 - samples/sec: 1964.53 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 20:11:17,516 epoch 6 - iter 891/992 - loss 0.02706129 - time (sec): 75.04 - samples/sec: 1966.78 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 20:11:26,264 epoch 6 - iter 990/992 - loss 0.02746838 - time (sec): 83.79 - samples/sec: 1953.73 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 20:11:26,410 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:11:26,410 EPOCH 6 done: loss 0.0274 - lr: 0.000013 |
|
2023-10-24 20:11:29,524 DEV : loss 0.17901930212974548 - f1-score (micro avg) 0.7463 |
|
2023-10-24 20:11:29,540 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:11:37,805 epoch 7 - iter 99/992 - loss 0.01294926 - time (sec): 8.26 - samples/sec: 1972.39 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 20:11:45,828 epoch 7 - iter 198/992 - loss 0.01678572 - time (sec): 16.29 - samples/sec: 1961.33 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 20:11:53,910 epoch 7 - iter 297/992 - loss 0.01669408 - time (sec): 24.37 - samples/sec: 1969.44 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 20:12:02,668 epoch 7 - iter 396/992 - loss 0.01988450 - time (sec): 33.13 - samples/sec: 1975.67 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 20:12:10,697 epoch 7 - iter 495/992 - loss 0.01913683 - time (sec): 41.16 - samples/sec: 1974.35 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 20:12:18,939 epoch 7 - iter 594/992 - loss 0.01868056 - time (sec): 49.40 - samples/sec: 1974.09 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 20:12:27,347 epoch 7 - iter 693/992 - loss 0.02037703 - time (sec): 57.81 - samples/sec: 1972.34 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 20:12:35,926 epoch 7 - iter 792/992 - loss 0.02060074 - time (sec): 66.39 - samples/sec: 1964.36 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 20:12:44,228 epoch 7 - iter 891/992 - loss 0.02004654 - time (sec): 74.69 - samples/sec: 1965.19 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 20:12:52,908 epoch 7 - iter 990/992 - loss 0.02013671 - time (sec): 83.37 - samples/sec: 1964.24 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 20:12:53,039 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:12:53,040 EPOCH 7 done: loss 0.0201 - lr: 0.000010 |
|
2023-10-24 20:12:56,159 DEV : loss 0.20294290781021118 - f1-score (micro avg) 0.7576 |
|
2023-10-24 20:12:56,174 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:13:04,435 epoch 8 - iter 99/992 - loss 0.01470518 - time (sec): 8.26 - samples/sec: 1974.52 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 20:13:12,428 epoch 8 - iter 198/992 - loss 0.01340265 - time (sec): 16.25 - samples/sec: 1958.84 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 20:13:21,278 epoch 8 - iter 297/992 - loss 0.01422147 - time (sec): 25.10 - samples/sec: 1970.59 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 20:13:29,355 epoch 8 - iter 396/992 - loss 0.01468520 - time (sec): 33.18 - samples/sec: 1961.33 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 20:13:37,775 epoch 8 - iter 495/992 - loss 0.01371271 - time (sec): 41.60 - samples/sec: 1964.85 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 20:13:46,273 epoch 8 - iter 594/992 - loss 0.01449551 - time (sec): 50.10 - samples/sec: 1958.63 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 20:13:54,651 epoch 8 - iter 693/992 - loss 0.01434886 - time (sec): 58.48 - samples/sec: 1959.82 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 20:14:03,169 epoch 8 - iter 792/992 - loss 0.01494488 - time (sec): 66.99 - samples/sec: 1949.74 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 20:14:11,455 epoch 8 - iter 891/992 - loss 0.01477385 - time (sec): 75.28 - samples/sec: 1953.26 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 20:14:19,763 epoch 8 - iter 990/992 - loss 0.01485260 - time (sec): 83.59 - samples/sec: 1957.53 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 20:14:19,920 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:14:19,920 EPOCH 8 done: loss 0.0148 - lr: 0.000007 |
|
2023-10-24 20:14:23,042 DEV : loss 0.21496112644672394 - f1-score (micro avg) 0.7605 |
|
2023-10-24 20:14:23,057 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:14:31,085 epoch 9 - iter 99/992 - loss 0.01053370 - time (sec): 8.03 - samples/sec: 1948.94 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 20:14:39,217 epoch 9 - iter 198/992 - loss 0.01152026 - time (sec): 16.16 - samples/sec: 1972.14 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 20:14:47,435 epoch 9 - iter 297/992 - loss 0.01216313 - time (sec): 24.38 - samples/sec: 1927.95 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 20:14:55,627 epoch 9 - iter 396/992 - loss 0.01108699 - time (sec): 32.57 - samples/sec: 1959.28 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 20:15:03,755 epoch 9 - iter 495/992 - loss 0.01068865 - time (sec): 40.70 - samples/sec: 1972.11 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 20:15:12,520 epoch 9 - iter 594/992 - loss 0.01066247 - time (sec): 49.46 - samples/sec: 1971.76 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 20:15:20,461 epoch 9 - iter 693/992 - loss 0.01027592 - time (sec): 57.40 - samples/sec: 1970.11 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 20:15:29,585 epoch 9 - iter 792/992 - loss 0.01051584 - time (sec): 66.53 - samples/sec: 1976.83 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 20:15:37,997 epoch 9 - iter 891/992 - loss 0.01045564 - time (sec): 74.94 - samples/sec: 1970.39 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 20:15:46,100 epoch 9 - iter 990/992 - loss 0.01035266 - time (sec): 83.04 - samples/sec: 1969.80 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 20:15:46,330 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:15:46,330 EPOCH 9 done: loss 0.0103 - lr: 0.000003 |
|
2023-10-24 20:15:49,449 DEV : loss 0.22752279043197632 - f1-score (micro avg) 0.7657 |
|
2023-10-24 20:15:49,465 saving best model |
|
2023-10-24 20:15:50,053 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:15:58,528 epoch 10 - iter 99/992 - loss 0.00841629 - time (sec): 8.47 - samples/sec: 2028.35 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 20:16:07,090 epoch 10 - iter 198/992 - loss 0.00843041 - time (sec): 17.04 - samples/sec: 1979.28 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 20:16:15,587 epoch 10 - iter 297/992 - loss 0.00740231 - time (sec): 25.53 - samples/sec: 1999.78 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 20:16:24,442 epoch 10 - iter 396/992 - loss 0.00795326 - time (sec): 34.39 - samples/sec: 1985.21 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 20:16:32,342 epoch 10 - iter 495/992 - loss 0.00754616 - time (sec): 42.29 - samples/sec: 1973.92 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 20:16:40,916 epoch 10 - iter 594/992 - loss 0.00881793 - time (sec): 50.86 - samples/sec: 1961.40 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 20:16:48,907 epoch 10 - iter 693/992 - loss 0.00853980 - time (sec): 58.85 - samples/sec: 1950.08 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 20:16:57,171 epoch 10 - iter 792/992 - loss 0.00846067 - time (sec): 67.12 - samples/sec: 1952.36 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 20:17:05,549 epoch 10 - iter 891/992 - loss 0.00829367 - time (sec): 75.49 - samples/sec: 1954.03 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 20:17:13,768 epoch 10 - iter 990/992 - loss 0.00802691 - time (sec): 83.71 - samples/sec: 1954.22 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 20:17:13,944 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:17:13,944 EPOCH 10 done: loss 0.0080 - lr: 0.000000 |
|
2023-10-24 20:17:17,067 DEV : loss 0.23069411516189575 - f1-score (micro avg) 0.7631 |
|
2023-10-24 20:17:17,551 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 20:17:17,552 Loading model from best epoch ... |
|
2023-10-24 20:17:19,017 SequenceTagger predicts: Dictionary with 13 tags: O, S-PER, B-PER, E-PER, I-PER, S-LOC, B-LOC, E-LOC, I-LOC, S-ORG, B-ORG, E-ORG, I-ORG |
|
2023-10-24 20:17:22,092 |
|
Results: |
|
- F-score (micro) 0.7717 |
|
- F-score (macro) 0.6869 |
|
- Accuracy 0.6485 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
LOC 0.8156 0.8443 0.8297 655 |
|
PER 0.7040 0.7892 0.7442 223 |
|
ORG 0.5556 0.4331 0.4867 127 |
|
|
|
micro avg 0.7634 0.7801 0.7717 1005 |
|
macro avg 0.6917 0.6889 0.6869 1005 |
|
weighted avg 0.7580 0.7801 0.7674 1005 |
|
|
|
2023-10-24 20:17:22,093 ---------------------------------------------------------------------------------------------------- |
|
|