|
2023-10-25 09:35:15,970 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 MultiCorpus: 14465 train + 1392 dev + 2432 test sentences |
|
- NER_HIPE_2022 Corpus: 14465 train + 1392 dev + 2432 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/letemps/fr/with_doc_seperator |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 Train: 14465 sentences |
|
2023-10-25 09:35:15,971 (train_with_dev=False, train_with_test=False) |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 Training Params: |
|
2023-10-25 09:35:15,971 - learning_rate: "5e-05" |
|
2023-10-25 09:35:15,971 - mini_batch_size: "4" |
|
2023-10-25 09:35:15,971 - max_epochs: "10" |
|
2023-10-25 09:35:15,971 - shuffle: "True" |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 Plugins: |
|
2023-10-25 09:35:15,971 - TensorboardLogger |
|
2023-10-25 09:35:15,971 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-25 09:35:15,971 - metric: "('micro avg', 'f1-score')" |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 Computation: |
|
2023-10-25 09:35:15,971 - compute on device: cuda:0 |
|
2023-10-25 09:35:15,971 - embedding storage: none |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 Model training base path: "hmbench-letemps/fr-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-1" |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,971 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:35:15,972 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-25 09:35:38,470 epoch 1 - iter 361/3617 - loss 1.05695090 - time (sec): 22.50 - samples/sec: 1682.68 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 09:36:00,826 epoch 1 - iter 722/3617 - loss 0.61904920 - time (sec): 44.85 - samples/sec: 1676.74 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 09:36:23,508 epoch 1 - iter 1083/3617 - loss 0.45672288 - time (sec): 67.54 - samples/sec: 1682.98 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 09:36:46,172 epoch 1 - iter 1444/3617 - loss 0.37587099 - time (sec): 90.20 - samples/sec: 1682.16 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 09:37:08,680 epoch 1 - iter 1805/3617 - loss 0.32508458 - time (sec): 112.71 - samples/sec: 1677.72 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 09:37:31,312 epoch 1 - iter 2166/3617 - loss 0.28960332 - time (sec): 135.34 - samples/sec: 1679.00 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 09:37:53,870 epoch 1 - iter 2527/3617 - loss 0.26362773 - time (sec): 157.90 - samples/sec: 1682.18 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-25 09:38:16,458 epoch 1 - iter 2888/3617 - loss 0.24619176 - time (sec): 180.49 - samples/sec: 1679.99 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-25 09:38:39,263 epoch 1 - iter 3249/3617 - loss 0.23199189 - time (sec): 203.29 - samples/sec: 1679.11 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-25 09:39:01,722 epoch 1 - iter 3610/3617 - loss 0.22149161 - time (sec): 225.75 - samples/sec: 1679.01 - lr: 0.000050 - momentum: 0.000000 |
|
2023-10-25 09:39:02,184 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:39:02,184 EPOCH 1 done: loss 0.2211 - lr: 0.000050 |
|
2023-10-25 09:39:06,714 DEV : loss 0.164424329996109 - f1-score (micro avg) 0.5744 |
|
2023-10-25 09:39:06,737 saving best model |
|
2023-10-25 09:39:07,206 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:39:29,790 epoch 2 - iter 361/3617 - loss 0.10128829 - time (sec): 22.58 - samples/sec: 1695.54 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-25 09:39:52,798 epoch 2 - iter 722/3617 - loss 0.11212638 - time (sec): 45.59 - samples/sec: 1692.60 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-25 09:40:15,575 epoch 2 - iter 1083/3617 - loss 0.11116111 - time (sec): 68.37 - samples/sec: 1692.36 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-25 09:40:38,290 epoch 2 - iter 1444/3617 - loss 0.10814252 - time (sec): 91.08 - samples/sec: 1685.77 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-25 09:41:01,016 epoch 2 - iter 1805/3617 - loss 0.10845616 - time (sec): 113.81 - samples/sec: 1682.58 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-25 09:41:23,650 epoch 2 - iter 2166/3617 - loss 0.10793655 - time (sec): 136.44 - samples/sec: 1676.13 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-25 09:41:46,137 epoch 2 - iter 2527/3617 - loss 0.10723312 - time (sec): 158.93 - samples/sec: 1672.72 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-25 09:42:08,851 epoch 2 - iter 2888/3617 - loss 0.10438015 - time (sec): 181.64 - samples/sec: 1674.94 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-25 09:42:31,577 epoch 2 - iter 3249/3617 - loss 0.10418652 - time (sec): 204.37 - samples/sec: 1673.05 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-25 09:42:53,958 epoch 2 - iter 3610/3617 - loss 0.10451376 - time (sec): 226.75 - samples/sec: 1671.92 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-25 09:42:54,390 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:42:54,391 EPOCH 2 done: loss 0.1044 - lr: 0.000044 |
|
2023-10-25 09:42:59,654 DEV : loss 0.1753711998462677 - f1-score (micro avg) 0.6163 |
|
2023-10-25 09:42:59,677 saving best model |
|
2023-10-25 09:43:00,248 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:43:22,867 epoch 3 - iter 361/3617 - loss 0.09187774 - time (sec): 22.62 - samples/sec: 1661.09 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-25 09:43:45,716 epoch 3 - iter 722/3617 - loss 0.08807253 - time (sec): 45.47 - samples/sec: 1671.54 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-25 09:44:08,034 epoch 3 - iter 1083/3617 - loss 0.07979957 - time (sec): 67.79 - samples/sec: 1677.05 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-25 09:44:30,500 epoch 3 - iter 1444/3617 - loss 0.08408592 - time (sec): 90.25 - samples/sec: 1674.39 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-25 09:44:53,092 epoch 3 - iter 1805/3617 - loss 0.08186299 - time (sec): 112.84 - samples/sec: 1681.67 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-25 09:45:16,103 epoch 3 - iter 2166/3617 - loss 0.08187069 - time (sec): 135.85 - samples/sec: 1686.84 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-25 09:45:38,530 epoch 3 - iter 2527/3617 - loss 0.08256031 - time (sec): 158.28 - samples/sec: 1680.22 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-25 09:46:01,369 epoch 3 - iter 2888/3617 - loss 0.08357263 - time (sec): 181.12 - samples/sec: 1687.72 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-25 09:46:24,129 epoch 3 - iter 3249/3617 - loss 0.08633465 - time (sec): 203.88 - samples/sec: 1682.22 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-25 09:46:46,514 epoch 3 - iter 3610/3617 - loss 0.08683966 - time (sec): 226.27 - samples/sec: 1676.34 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-25 09:46:46,935 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:46:46,935 EPOCH 3 done: loss 0.0867 - lr: 0.000039 |
|
2023-10-25 09:46:52,201 DEV : loss 0.1687808334827423 - f1-score (micro avg) 0.6053 |
|
2023-10-25 09:46:52,224 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:47:14,850 epoch 4 - iter 361/3617 - loss 0.05413728 - time (sec): 22.63 - samples/sec: 1678.58 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-25 09:47:37,725 epoch 4 - iter 722/3617 - loss 0.05722728 - time (sec): 45.50 - samples/sec: 1697.08 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-25 09:48:00,109 epoch 4 - iter 1083/3617 - loss 0.07150763 - time (sec): 67.88 - samples/sec: 1673.02 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-25 09:48:22,695 epoch 4 - iter 1444/3617 - loss 0.06878229 - time (sec): 90.47 - samples/sec: 1673.24 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-25 09:48:45,328 epoch 4 - iter 1805/3617 - loss 0.06707460 - time (sec): 113.10 - samples/sec: 1674.93 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-25 09:49:08,058 epoch 4 - iter 2166/3617 - loss 0.06431722 - time (sec): 135.83 - samples/sec: 1677.87 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-25 09:49:30,598 epoch 4 - iter 2527/3617 - loss 0.06497242 - time (sec): 158.37 - samples/sec: 1675.39 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-25 09:49:53,041 epoch 4 - iter 2888/3617 - loss 0.06441663 - time (sec): 180.82 - samples/sec: 1673.82 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-25 09:50:15,774 epoch 4 - iter 3249/3617 - loss 0.06478123 - time (sec): 203.55 - samples/sec: 1672.41 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-25 09:50:38,665 epoch 4 - iter 3610/3617 - loss 0.06560130 - time (sec): 226.44 - samples/sec: 1674.16 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-25 09:50:39,104 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:50:39,104 EPOCH 4 done: loss 0.0655 - lr: 0.000033 |
|
2023-10-25 09:50:43,863 DEV : loss 0.2523341476917267 - f1-score (micro avg) 0.6151 |
|
2023-10-25 09:50:43,887 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:51:06,754 epoch 5 - iter 361/3617 - loss 0.04758910 - time (sec): 22.87 - samples/sec: 1598.33 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-25 09:51:29,489 epoch 5 - iter 722/3617 - loss 0.04378816 - time (sec): 45.60 - samples/sec: 1625.72 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-25 09:51:52,162 epoch 5 - iter 1083/3617 - loss 0.04314076 - time (sec): 68.27 - samples/sec: 1644.36 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-25 09:52:14,724 epoch 5 - iter 1444/3617 - loss 0.04929634 - time (sec): 90.84 - samples/sec: 1651.33 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-25 09:52:37,353 epoch 5 - iter 1805/3617 - loss 0.07597635 - time (sec): 113.47 - samples/sec: 1665.71 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-25 09:52:59,935 epoch 5 - iter 2166/3617 - loss 0.10924113 - time (sec): 136.05 - samples/sec: 1663.20 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 09:53:22,507 epoch 5 - iter 2527/3617 - loss 0.13627939 - time (sec): 158.62 - samples/sec: 1661.65 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 09:53:45,215 epoch 5 - iter 2888/3617 - loss 0.15088807 - time (sec): 181.33 - samples/sec: 1670.70 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 09:54:07,794 epoch 5 - iter 3249/3617 - loss 0.16557653 - time (sec): 203.91 - samples/sec: 1670.34 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 09:54:30,658 epoch 5 - iter 3610/3617 - loss 0.16418110 - time (sec): 226.77 - samples/sec: 1672.70 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 09:54:31,073 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:54:31,074 EPOCH 5 done: loss 0.1641 - lr: 0.000028 |
|
2023-10-25 09:54:35,815 DEV : loss 0.2738305926322937 - f1-score (micro avg) 0.4385 |
|
2023-10-25 09:54:35,838 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:54:58,458 epoch 6 - iter 361/3617 - loss 0.07602831 - time (sec): 22.62 - samples/sec: 1608.27 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 09:55:21,211 epoch 6 - iter 722/3617 - loss 0.06715999 - time (sec): 45.37 - samples/sec: 1665.10 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 09:55:44,081 epoch 6 - iter 1083/3617 - loss 0.06021834 - time (sec): 68.24 - samples/sec: 1665.09 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 09:56:06,585 epoch 6 - iter 1444/3617 - loss 0.05494355 - time (sec): 90.75 - samples/sec: 1655.47 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 09:56:29,260 epoch 6 - iter 1805/3617 - loss 0.05247836 - time (sec): 113.42 - samples/sec: 1662.91 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 09:56:51,734 epoch 6 - iter 2166/3617 - loss 0.05169263 - time (sec): 135.90 - samples/sec: 1662.80 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 09:57:14,488 epoch 6 - iter 2527/3617 - loss 0.05000458 - time (sec): 158.65 - samples/sec: 1664.74 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 09:57:37,297 epoch 6 - iter 2888/3617 - loss 0.04799543 - time (sec): 181.46 - samples/sec: 1669.41 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 09:58:00,021 epoch 6 - iter 3249/3617 - loss 0.04620394 - time (sec): 204.18 - samples/sec: 1669.05 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 09:58:23,282 epoch 6 - iter 3610/3617 - loss 0.04597887 - time (sec): 227.44 - samples/sec: 1666.18 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 09:58:23,734 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:58:23,735 EPOCH 6 done: loss 0.0460 - lr: 0.000022 |
|
2023-10-25 09:58:28,505 DEV : loss 0.2893408536911011 - f1-score (micro avg) 0.6033 |
|
2023-10-25 09:58:28,528 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 09:58:51,138 epoch 7 - iter 361/3617 - loss 0.02052274 - time (sec): 22.61 - samples/sec: 1693.32 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 09:59:13,437 epoch 7 - iter 722/3617 - loss 0.02181682 - time (sec): 44.91 - samples/sec: 1680.96 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 09:59:35,956 epoch 7 - iter 1083/3617 - loss 0.02519344 - time (sec): 67.43 - samples/sec: 1672.46 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 09:59:58,626 epoch 7 - iter 1444/3617 - loss 0.02594583 - time (sec): 90.10 - samples/sec: 1677.52 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 10:00:21,677 epoch 7 - iter 1805/3617 - loss 0.02582872 - time (sec): 113.15 - samples/sec: 1694.28 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 10:00:44,093 epoch 7 - iter 2166/3617 - loss 0.02645313 - time (sec): 135.56 - samples/sec: 1684.00 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 10:01:07,030 epoch 7 - iter 2527/3617 - loss 0.02674202 - time (sec): 158.50 - samples/sec: 1679.42 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 10:01:29,669 epoch 7 - iter 2888/3617 - loss 0.02630636 - time (sec): 181.14 - samples/sec: 1678.04 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 10:01:52,432 epoch 7 - iter 3249/3617 - loss 0.02602543 - time (sec): 203.90 - samples/sec: 1679.87 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 10:02:15,089 epoch 7 - iter 3610/3617 - loss 0.02686103 - time (sec): 226.56 - samples/sec: 1674.23 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 10:02:15,493 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 10:02:15,493 EPOCH 7 done: loss 0.0269 - lr: 0.000017 |
|
2023-10-25 10:02:20,803 DEV : loss 0.362984299659729 - f1-score (micro avg) 0.6264 |
|
2023-10-25 10:02:20,826 saving best model |
|
2023-10-25 10:02:21,400 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 10:02:44,280 epoch 8 - iter 361/3617 - loss 0.01775868 - time (sec): 22.88 - samples/sec: 1680.20 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 10:03:07,052 epoch 8 - iter 722/3617 - loss 0.01658963 - time (sec): 45.65 - samples/sec: 1666.28 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 10:03:29,927 epoch 8 - iter 1083/3617 - loss 0.01602400 - time (sec): 68.53 - samples/sec: 1688.63 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 10:03:52,202 epoch 8 - iter 1444/3617 - loss 0.01659828 - time (sec): 90.80 - samples/sec: 1681.73 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 10:04:14,886 epoch 8 - iter 1805/3617 - loss 0.01706020 - time (sec): 113.48 - samples/sec: 1679.42 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 10:04:37,673 epoch 8 - iter 2166/3617 - loss 0.01752539 - time (sec): 136.27 - samples/sec: 1677.52 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 10:05:00,183 epoch 8 - iter 2527/3617 - loss 0.01711630 - time (sec): 158.78 - samples/sec: 1671.96 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 10:05:23,054 epoch 8 - iter 2888/3617 - loss 0.01739202 - time (sec): 181.65 - samples/sec: 1672.88 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 10:05:45,686 epoch 8 - iter 3249/3617 - loss 0.01664947 - time (sec): 204.28 - samples/sec: 1672.12 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 10:06:08,304 epoch 8 - iter 3610/3617 - loss 0.01666693 - time (sec): 226.90 - samples/sec: 1671.47 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 10:06:08,723 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 10:06:08,724 EPOCH 8 done: loss 0.0166 - lr: 0.000011 |
|
2023-10-25 10:06:14,036 DEV : loss 0.3784872889518738 - f1-score (micro avg) 0.6276 |
|
2023-10-25 10:06:14,060 saving best model |
|
2023-10-25 10:06:14,654 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 10:06:37,630 epoch 9 - iter 361/3617 - loss 0.00871238 - time (sec): 22.97 - samples/sec: 1699.47 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 10:07:00,371 epoch 9 - iter 722/3617 - loss 0.01127845 - time (sec): 45.72 - samples/sec: 1713.82 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 10:07:22,893 epoch 9 - iter 1083/3617 - loss 0.00954485 - time (sec): 68.24 - samples/sec: 1700.01 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 10:07:45,389 epoch 9 - iter 1444/3617 - loss 0.01037918 - time (sec): 90.73 - samples/sec: 1681.78 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 10:08:08,359 epoch 9 - iter 1805/3617 - loss 0.01022566 - time (sec): 113.70 - samples/sec: 1690.66 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 10:08:30,955 epoch 9 - iter 2166/3617 - loss 0.01048162 - time (sec): 136.30 - samples/sec: 1681.17 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 10:08:53,592 epoch 9 - iter 2527/3617 - loss 0.01062263 - time (sec): 158.94 - samples/sec: 1674.84 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 10:09:16,256 epoch 9 - iter 2888/3617 - loss 0.01069743 - time (sec): 181.60 - samples/sec: 1675.08 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 10:09:38,894 epoch 9 - iter 3249/3617 - loss 0.01061481 - time (sec): 204.24 - samples/sec: 1674.70 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 10:10:01,366 epoch 9 - iter 3610/3617 - loss 0.01090475 - time (sec): 226.71 - samples/sec: 1671.60 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 10:10:01,837 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 10:10:01,837 EPOCH 9 done: loss 0.0109 - lr: 0.000006 |
|
2023-10-25 10:10:07,142 DEV : loss 0.36856386065483093 - f1-score (micro avg) 0.6283 |
|
2023-10-25 10:10:07,165 saving best model |
|
2023-10-25 10:10:07,759 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 10:10:30,329 epoch 10 - iter 361/3617 - loss 0.00788874 - time (sec): 22.57 - samples/sec: 1693.26 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 10:10:52,894 epoch 10 - iter 722/3617 - loss 0.00932594 - time (sec): 45.13 - samples/sec: 1692.70 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 10:11:15,621 epoch 10 - iter 1083/3617 - loss 0.00852378 - time (sec): 67.86 - samples/sec: 1672.67 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 10:11:38,372 epoch 10 - iter 1444/3617 - loss 0.00785181 - time (sec): 90.61 - samples/sec: 1676.36 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 10:12:00,876 epoch 10 - iter 1805/3617 - loss 0.00792484 - time (sec): 113.12 - samples/sec: 1667.79 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 10:12:23,473 epoch 10 - iter 2166/3617 - loss 0.00740207 - time (sec): 135.71 - samples/sec: 1666.96 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 10:12:46,105 epoch 10 - iter 2527/3617 - loss 0.00754265 - time (sec): 158.34 - samples/sec: 1660.76 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 10:13:08,972 epoch 10 - iter 2888/3617 - loss 0.00739471 - time (sec): 181.21 - samples/sec: 1665.43 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 10:13:31,727 epoch 10 - iter 3249/3617 - loss 0.00769671 - time (sec): 203.97 - samples/sec: 1670.47 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 10:13:54,401 epoch 10 - iter 3610/3617 - loss 0.00741003 - time (sec): 226.64 - samples/sec: 1674.04 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-25 10:13:54,800 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 10:13:54,801 EPOCH 10 done: loss 0.0074 - lr: 0.000000 |
|
2023-10-25 10:13:59,572 DEV : loss 0.4016081988811493 - f1-score (micro avg) 0.626 |
|
2023-10-25 10:14:00,063 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 10:14:00,063 Loading model from best epoch ... |
|
2023-10-25 10:14:01,739 SequenceTagger predicts: Dictionary with 13 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org |
|
2023-10-25 10:14:07,996 |
|
Results: |
|
- F-score (micro) 0.6347 |
|
- F-score (macro) 0.5196 |
|
- Accuracy 0.4797 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.6325 0.7310 0.6782 591 |
|
pers 0.5637 0.7563 0.6459 357 |
|
org 0.2289 0.2405 0.2346 79 |
|
|
|
micro avg 0.5791 0.7020 0.6347 1027 |
|
macro avg 0.4750 0.5759 0.5196 1027 |
|
weighted avg 0.5775 0.7020 0.6328 1027 |
|
|
|
2023-10-25 10:14:07,996 ---------------------------------------------------------------------------------------------------- |
|
|