|
2023-10-24 19:05:23,213 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,214 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-24 19:05:23,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,214 MultiCorpus: 7936 train + 992 dev + 992 test sentences |
|
- NER_ICDAR_EUROPEANA Corpus: 7936 train + 992 dev + 992 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/fr |
|
2023-10-24 19:05:23,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,214 Train: 7936 sentences |
|
2023-10-24 19:05:23,214 (train_with_dev=False, train_with_test=False) |
|
2023-10-24 19:05:23,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,214 Training Params: |
|
2023-10-24 19:05:23,214 - learning_rate: "5e-05" |
|
2023-10-24 19:05:23,214 - mini_batch_size: "8" |
|
2023-10-24 19:05:23,215 - max_epochs: "10" |
|
2023-10-24 19:05:23,215 - shuffle: "True" |
|
2023-10-24 19:05:23,215 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,215 Plugins: |
|
2023-10-24 19:05:23,215 - TensorboardLogger |
|
2023-10-24 19:05:23,215 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-24 19:05:23,215 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,215 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-24 19:05:23,215 - metric: "('micro avg', 'f1-score')" |
|
2023-10-24 19:05:23,215 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,215 Computation: |
|
2023-10-24 19:05:23,215 - compute on device: cuda:0 |
|
2023-10-24 19:05:23,215 - embedding storage: none |
|
2023-10-24 19:05:23,215 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,215 Model training base path: "hmbench-icdar/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs8-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-4" |
|
2023-10-24 19:05:23,215 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,215 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:05:23,215 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-24 19:05:31,175 epoch 1 - iter 99/992 - loss 1.70950296 - time (sec): 7.96 - samples/sec: 2082.83 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 19:05:39,330 epoch 1 - iter 198/992 - loss 1.01384479 - time (sec): 16.11 - samples/sec: 2064.96 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 19:05:47,790 epoch 1 - iter 297/992 - loss 0.75778922 - time (sec): 24.57 - samples/sec: 1996.91 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 19:05:55,836 epoch 1 - iter 396/992 - loss 0.61189230 - time (sec): 32.62 - samples/sec: 1991.62 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 19:06:04,134 epoch 1 - iter 495/992 - loss 0.51884386 - time (sec): 40.92 - samples/sec: 1990.45 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 19:06:12,350 epoch 1 - iter 594/992 - loss 0.45856004 - time (sec): 49.13 - samples/sec: 1977.54 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 19:06:21,080 epoch 1 - iter 693/992 - loss 0.41271354 - time (sec): 57.86 - samples/sec: 1966.53 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 19:06:29,237 epoch 1 - iter 792/992 - loss 0.37655176 - time (sec): 66.02 - samples/sec: 1973.90 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-24 19:06:37,225 epoch 1 - iter 891/992 - loss 0.34959778 - time (sec): 74.01 - samples/sec: 1978.32 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-24 19:06:46,108 epoch 1 - iter 990/992 - loss 0.32534380 - time (sec): 82.89 - samples/sec: 1975.55 - lr: 0.000050 - momentum: 0.000000 |
|
2023-10-24 19:06:46,247 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:06:46,248 EPOCH 1 done: loss 0.3253 - lr: 0.000050 |
|
2023-10-24 19:06:49,320 DEV : loss 0.10130015760660172 - f1-score (micro avg) 0.6799 |
|
2023-10-24 19:06:49,335 saving best model |
|
2023-10-24 19:06:49,808 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:06:58,310 epoch 2 - iter 99/992 - loss 0.10678352 - time (sec): 8.50 - samples/sec: 2028.19 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 19:07:06,731 epoch 2 - iter 198/992 - loss 0.10123782 - time (sec): 16.92 - samples/sec: 1993.53 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-24 19:07:14,788 epoch 2 - iter 297/992 - loss 0.09886519 - time (sec): 24.98 - samples/sec: 1996.00 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-24 19:07:23,057 epoch 2 - iter 396/992 - loss 0.10164530 - time (sec): 33.25 - samples/sec: 1994.28 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-24 19:07:31,670 epoch 2 - iter 495/992 - loss 0.10087299 - time (sec): 41.86 - samples/sec: 1992.96 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-24 19:07:40,135 epoch 2 - iter 594/992 - loss 0.10136669 - time (sec): 50.33 - samples/sec: 1981.49 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-24 19:07:48,052 epoch 2 - iter 693/992 - loss 0.10102277 - time (sec): 58.24 - samples/sec: 1974.27 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-24 19:07:56,239 epoch 2 - iter 792/992 - loss 0.10046863 - time (sec): 66.43 - samples/sec: 1969.35 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-24 19:08:04,561 epoch 2 - iter 891/992 - loss 0.09965890 - time (sec): 74.75 - samples/sec: 1974.95 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-24 19:08:12,651 epoch 2 - iter 990/992 - loss 0.09937356 - time (sec): 82.84 - samples/sec: 1969.96 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-24 19:08:13,107 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:08:13,107 EPOCH 2 done: loss 0.0995 - lr: 0.000044 |
|
2023-10-24 19:08:16,521 DEV : loss 0.0830492153763771 - f1-score (micro avg) 0.7227 |
|
2023-10-24 19:08:16,537 saving best model |
|
2023-10-24 19:08:17,173 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:08:25,132 epoch 3 - iter 99/992 - loss 0.06256824 - time (sec): 7.96 - samples/sec: 2024.19 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-24 19:08:33,694 epoch 3 - iter 198/992 - loss 0.06110784 - time (sec): 16.52 - samples/sec: 2002.92 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-24 19:08:42,023 epoch 3 - iter 297/992 - loss 0.06752868 - time (sec): 24.85 - samples/sec: 1959.14 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-24 19:08:50,107 epoch 3 - iter 396/992 - loss 0.06822405 - time (sec): 32.93 - samples/sec: 1940.81 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-24 19:08:58,559 epoch 3 - iter 495/992 - loss 0.06716269 - time (sec): 41.39 - samples/sec: 1946.31 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-24 19:09:06,889 epoch 3 - iter 594/992 - loss 0.06703667 - time (sec): 49.72 - samples/sec: 1956.04 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-24 19:09:15,261 epoch 3 - iter 693/992 - loss 0.06724873 - time (sec): 58.09 - samples/sec: 1972.02 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-24 19:09:23,760 epoch 3 - iter 792/992 - loss 0.06798579 - time (sec): 66.59 - samples/sec: 1967.26 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-24 19:09:32,288 epoch 3 - iter 891/992 - loss 0.06874500 - time (sec): 75.11 - samples/sec: 1967.00 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-24 19:09:40,478 epoch 3 - iter 990/992 - loss 0.06831140 - time (sec): 83.30 - samples/sec: 1963.52 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-24 19:09:40,679 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:09:40,679 EPOCH 3 done: loss 0.0682 - lr: 0.000039 |
|
2023-10-24 19:09:43,796 DEV : loss 0.11285286396741867 - f1-score (micro avg) 0.7414 |
|
2023-10-24 19:09:43,812 saving best model |
|
2023-10-24 19:09:44,464 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:09:52,525 epoch 4 - iter 99/992 - loss 0.03625260 - time (sec): 8.06 - samples/sec: 1924.31 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-24 19:10:01,030 epoch 4 - iter 198/992 - loss 0.04677937 - time (sec): 16.57 - samples/sec: 1953.03 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-24 19:10:09,422 epoch 4 - iter 297/992 - loss 0.04578826 - time (sec): 24.96 - samples/sec: 1983.09 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-24 19:10:17,505 epoch 4 - iter 396/992 - loss 0.04479099 - time (sec): 33.04 - samples/sec: 1961.58 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-24 19:10:25,630 epoch 4 - iter 495/992 - loss 0.04686146 - time (sec): 41.17 - samples/sec: 1954.14 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-24 19:10:34,259 epoch 4 - iter 594/992 - loss 0.04762399 - time (sec): 49.79 - samples/sec: 1960.97 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-24 19:10:42,614 epoch 4 - iter 693/992 - loss 0.04722873 - time (sec): 58.15 - samples/sec: 1957.15 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-24 19:10:51,075 epoch 4 - iter 792/992 - loss 0.04821478 - time (sec): 66.61 - samples/sec: 1951.82 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-24 19:10:59,484 epoch 4 - iter 891/992 - loss 0.04777276 - time (sec): 75.02 - samples/sec: 1963.79 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-24 19:11:08,254 epoch 4 - iter 990/992 - loss 0.04940618 - time (sec): 83.79 - samples/sec: 1952.77 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-24 19:11:08,452 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:11:08,452 EPOCH 4 done: loss 0.0493 - lr: 0.000033 |
|
2023-10-24 19:11:11,571 DEV : loss 0.11610622704029083 - f1-score (micro avg) 0.7619 |
|
2023-10-24 19:11:11,586 saving best model |
|
2023-10-24 19:11:12,225 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:11:20,827 epoch 5 - iter 99/992 - loss 0.03961834 - time (sec): 8.60 - samples/sec: 1958.57 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-24 19:11:28,982 epoch 5 - iter 198/992 - loss 0.03534589 - time (sec): 16.76 - samples/sec: 1969.79 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-24 19:11:37,315 epoch 5 - iter 297/992 - loss 0.03749801 - time (sec): 25.09 - samples/sec: 1981.88 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-24 19:11:46,151 epoch 5 - iter 396/992 - loss 0.03528151 - time (sec): 33.93 - samples/sec: 1983.96 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-24 19:11:54,365 epoch 5 - iter 495/992 - loss 0.03519979 - time (sec): 42.14 - samples/sec: 1974.45 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-24 19:12:02,429 epoch 5 - iter 594/992 - loss 0.03550820 - time (sec): 50.20 - samples/sec: 1968.78 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-24 19:12:10,603 epoch 5 - iter 693/992 - loss 0.03597362 - time (sec): 58.38 - samples/sec: 1964.72 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 19:12:18,946 epoch 5 - iter 792/992 - loss 0.03569118 - time (sec): 66.72 - samples/sec: 1969.04 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-24 19:12:27,035 epoch 5 - iter 891/992 - loss 0.03582242 - time (sec): 74.81 - samples/sec: 1962.92 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 19:12:35,694 epoch 5 - iter 990/992 - loss 0.03556779 - time (sec): 83.47 - samples/sec: 1960.31 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-24 19:12:35,875 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:12:35,875 EPOCH 5 done: loss 0.0356 - lr: 0.000028 |
|
2023-10-24 19:12:38,999 DEV : loss 0.1716771423816681 - f1-score (micro avg) 0.7426 |
|
2023-10-24 19:12:39,014 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:12:47,355 epoch 6 - iter 99/992 - loss 0.03224386 - time (sec): 8.34 - samples/sec: 1942.30 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 19:12:55,447 epoch 6 - iter 198/992 - loss 0.03197562 - time (sec): 16.43 - samples/sec: 1959.18 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-24 19:13:03,799 epoch 6 - iter 297/992 - loss 0.02965185 - time (sec): 24.78 - samples/sec: 1979.69 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 19:13:11,914 epoch 6 - iter 396/992 - loss 0.03043814 - time (sec): 32.90 - samples/sec: 1967.10 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-24 19:13:20,509 epoch 6 - iter 495/992 - loss 0.02992723 - time (sec): 41.49 - samples/sec: 1974.57 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-24 19:13:28,982 epoch 6 - iter 594/992 - loss 0.02978050 - time (sec): 49.97 - samples/sec: 1954.97 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 19:13:37,482 epoch 6 - iter 693/992 - loss 0.02928196 - time (sec): 58.47 - samples/sec: 1959.29 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-24 19:13:45,576 epoch 6 - iter 792/992 - loss 0.02952635 - time (sec): 66.56 - samples/sec: 1960.59 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 19:13:54,224 epoch 6 - iter 891/992 - loss 0.02912377 - time (sec): 75.21 - samples/sec: 1950.02 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-24 19:14:02,690 epoch 6 - iter 990/992 - loss 0.02939295 - time (sec): 83.68 - samples/sec: 1956.57 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 19:14:02,850 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:14:02,850 EPOCH 6 done: loss 0.0294 - lr: 0.000022 |
|
2023-10-24 19:14:05,972 DEV : loss 0.192485511302948 - f1-score (micro avg) 0.7462 |
|
2023-10-24 19:14:05,987 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:14:14,813 epoch 7 - iter 99/992 - loss 0.02045973 - time (sec): 8.83 - samples/sec: 1989.11 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-24 19:14:23,135 epoch 7 - iter 198/992 - loss 0.01981631 - time (sec): 17.15 - samples/sec: 1998.22 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 19:14:31,271 epoch 7 - iter 297/992 - loss 0.01923823 - time (sec): 25.28 - samples/sec: 2001.59 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-24 19:14:39,735 epoch 7 - iter 396/992 - loss 0.02020709 - time (sec): 33.75 - samples/sec: 1983.61 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-24 19:14:47,975 epoch 7 - iter 495/992 - loss 0.02153825 - time (sec): 41.99 - samples/sec: 1978.07 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 19:14:56,108 epoch 7 - iter 594/992 - loss 0.02042658 - time (sec): 50.12 - samples/sec: 1972.49 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-24 19:15:04,294 epoch 7 - iter 693/992 - loss 0.02067326 - time (sec): 58.31 - samples/sec: 1965.88 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 19:15:12,571 epoch 7 - iter 792/992 - loss 0.02092512 - time (sec): 66.58 - samples/sec: 1965.47 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-24 19:15:20,891 epoch 7 - iter 891/992 - loss 0.02067809 - time (sec): 74.90 - samples/sec: 1968.70 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 19:15:29,114 epoch 7 - iter 990/992 - loss 0.02076980 - time (sec): 83.13 - samples/sec: 1968.12 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-24 19:15:29,378 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:15:29,378 EPOCH 7 done: loss 0.0208 - lr: 0.000017 |
|
2023-10-24 19:15:32,489 DEV : loss 0.21390925347805023 - f1-score (micro avg) 0.7568 |
|
2023-10-24 19:15:32,504 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:15:40,895 epoch 8 - iter 99/992 - loss 0.01235834 - time (sec): 8.39 - samples/sec: 1954.00 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 19:15:49,494 epoch 8 - iter 198/992 - loss 0.01284679 - time (sec): 16.99 - samples/sec: 1975.32 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-24 19:15:57,663 epoch 8 - iter 297/992 - loss 0.01202123 - time (sec): 25.16 - samples/sec: 1980.96 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-24 19:16:05,831 epoch 8 - iter 396/992 - loss 0.01338066 - time (sec): 33.33 - samples/sec: 1985.64 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 19:16:14,383 epoch 8 - iter 495/992 - loss 0.01427779 - time (sec): 41.88 - samples/sec: 1991.53 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-24 19:16:22,512 epoch 8 - iter 594/992 - loss 0.01426704 - time (sec): 50.01 - samples/sec: 1990.06 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 19:16:31,092 epoch 8 - iter 693/992 - loss 0.01503984 - time (sec): 58.59 - samples/sec: 1974.64 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-24 19:16:39,618 epoch 8 - iter 792/992 - loss 0.01476754 - time (sec): 67.11 - samples/sec: 1956.26 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 19:16:48,166 epoch 8 - iter 891/992 - loss 0.01433965 - time (sec): 75.66 - samples/sec: 1958.05 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-24 19:16:56,116 epoch 8 - iter 990/992 - loss 0.01439873 - time (sec): 83.61 - samples/sec: 1957.13 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 19:16:56,266 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:16:56,266 EPOCH 8 done: loss 0.0144 - lr: 0.000011 |
|
2023-10-24 19:16:59,394 DEV : loss 0.2170470654964447 - f1-score (micro avg) 0.7653 |
|
2023-10-24 19:16:59,409 saving best model |
|
2023-10-24 19:16:59,998 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:17:08,452 epoch 9 - iter 99/992 - loss 0.01523691 - time (sec): 8.45 - samples/sec: 1956.81 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-24 19:17:16,335 epoch 9 - iter 198/992 - loss 0.01371175 - time (sec): 16.34 - samples/sec: 1973.01 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-24 19:17:25,129 epoch 9 - iter 297/992 - loss 0.01112734 - time (sec): 25.13 - samples/sec: 1961.09 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 19:17:33,617 epoch 9 - iter 396/992 - loss 0.01101364 - time (sec): 33.62 - samples/sec: 1954.71 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-24 19:17:41,740 epoch 9 - iter 495/992 - loss 0.01020473 - time (sec): 41.74 - samples/sec: 1971.57 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 19:17:49,783 epoch 9 - iter 594/992 - loss 0.00941962 - time (sec): 49.78 - samples/sec: 1965.69 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-24 19:17:58,224 epoch 9 - iter 693/992 - loss 0.00898993 - time (sec): 58.23 - samples/sec: 1968.14 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 19:18:06,525 epoch 9 - iter 792/992 - loss 0.00910924 - time (sec): 66.53 - samples/sec: 1970.28 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-24 19:18:15,185 epoch 9 - iter 891/992 - loss 0.00928332 - time (sec): 75.19 - samples/sec: 1961.67 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 19:18:23,413 epoch 9 - iter 990/992 - loss 0.00916308 - time (sec): 83.41 - samples/sec: 1962.06 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-24 19:18:23,585 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:18:23,585 EPOCH 9 done: loss 0.0092 - lr: 0.000006 |
|
2023-10-24 19:18:26,698 DEV : loss 0.2309696227312088 - f1-score (micro avg) 0.761 |
|
2023-10-24 19:18:26,713 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:18:34,965 epoch 10 - iter 99/992 - loss 0.00331234 - time (sec): 8.25 - samples/sec: 1929.01 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-24 19:18:43,239 epoch 10 - iter 198/992 - loss 0.00499297 - time (sec): 16.52 - samples/sec: 1982.26 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 19:18:51,539 epoch 10 - iter 297/992 - loss 0.00567139 - time (sec): 24.82 - samples/sec: 1956.99 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-24 19:18:59,907 epoch 10 - iter 396/992 - loss 0.00588887 - time (sec): 33.19 - samples/sec: 1963.07 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 19:19:08,519 epoch 10 - iter 495/992 - loss 0.00578750 - time (sec): 41.80 - samples/sec: 1954.56 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-24 19:19:17,405 epoch 10 - iter 594/992 - loss 0.00603662 - time (sec): 50.69 - samples/sec: 1943.26 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 19:19:25,492 epoch 10 - iter 693/992 - loss 0.00581652 - time (sec): 58.78 - samples/sec: 1950.26 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-24 19:19:33,597 epoch 10 - iter 792/992 - loss 0.00565761 - time (sec): 66.88 - samples/sec: 1952.72 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 19:19:42,061 epoch 10 - iter 891/992 - loss 0.00628280 - time (sec): 75.35 - samples/sec: 1961.00 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-24 19:19:50,323 epoch 10 - iter 990/992 - loss 0.00648941 - time (sec): 83.61 - samples/sec: 1958.09 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-24 19:19:50,484 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:19:50,484 EPOCH 10 done: loss 0.0065 - lr: 0.000000 |
|
2023-10-24 19:19:53,602 DEV : loss 0.24162988364696503 - f1-score (micro avg) 0.7665 |
|
2023-10-24 19:19:53,617 saving best model |
|
2023-10-24 19:19:54,685 ---------------------------------------------------------------------------------------------------- |
|
2023-10-24 19:19:54,686 Loading model from best epoch ... |
|
2023-10-24 19:19:56,216 SequenceTagger predicts: Dictionary with 13 tags: O, S-PER, B-PER, E-PER, I-PER, S-LOC, B-LOC, E-LOC, I-LOC, S-ORG, B-ORG, E-ORG, I-ORG |
|
2023-10-24 19:19:59,296 |
|
Results: |
|
- F-score (micro) 0.7905 |
|
- F-score (macro) 0.6954 |
|
- Accuracy 0.6762 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
LOC 0.8430 0.8687 0.8556 655 |
|
PER 0.7020 0.8027 0.7490 223 |
|
ORG 0.5843 0.4094 0.4815 127 |
|
|
|
micro avg 0.7851 0.7960 0.7905 1005 |
|
macro avg 0.7097 0.6936 0.6954 1005 |
|
weighted avg 0.7790 0.7960 0.7847 1005 |
|
|
|
2023-10-24 19:19:59,296 ---------------------------------------------------------------------------------------------------- |
|
|