End of training

Browse files

Files changed (5) hide show

README.md +47 -16
config.json +16 -14
model.safetensors +2 -2
tokenizer_config.json +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: distilbert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
@@ -18,12 +18,12 @@ should probably proofread and complete it, then remove this comment. -->
 # training
-This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.0202
 - Accuracy: 0.6768
-- F1: 0.6767
-- Precision: 0.6768
 - Recall: 0.6768
 ## Model description
@@ -44,26 +44,57 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 20
 - eval_batch_size: 20
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 9
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
-| No log        | 1.0   | 132  | 0.6941          | 0.5076   | 0.3418 | 0.2577    | 0.5076 |
-| No log        | 2.0   | 264  | 0.6933          | 0.5107   | 0.4370 | 0.5347    | 0.5107 |
-| No log        | 3.0   | 396  | 0.6766          | 0.5762   | 0.5742 | 0.5766    | 0.5762 |
-| 0.6466        | 4.0   | 528  | 0.7121          | 0.6067   | 0.6061 | 0.6068    | 0.6067 |
-| 0.6466        | 5.0   | 660  | 0.7875          | 0.6448   | 0.6367 | 0.6624    | 0.6448 |
-| 0.6466        | 6.0   | 792  | 0.8395          | 0.6692   | 0.6664 | 0.6771    | 0.6692 |
-| 0.6466        | 7.0   | 924  | 0.9008          | 0.6768   | 0.6765 | 0.6783    | 0.6768 |
-| 0.2623        | 8.0   | 1056 | 0.9956          | 0.6707   | 0.6703 | 0.6711    | 0.6707 |
-| 0.2623        | 9.0   | 1188 | 1.0202          | 0.6768   | 0.6767 | 0.6768    | 0.6768 |
 ### Framework versions

 ---
 license: apache-2.0
+base_model: bert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
 # training
+This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.3256
 - Accuracy: 0.6768
+- F1: 0.6764
+- Precision: 0.6772
 - Recall: 0.6768
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 40
 - eval_batch_size: 20
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 40
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
+| No log        | 1.0   | 66   | 0.7029          | 0.4939   | 0.3623 | 0.4289    | 0.4939 |
+| No log        | 2.0   | 132  | 0.6985          | 0.4726   | 0.4074 | 0.4429    | 0.4726 |
+| No log        | 3.0   | 198  | 0.7052          | 0.5091   | 0.5079 | 0.5101    | 0.5091 |
+| No log        | 4.0   | 264  | 0.7277          | 0.5732   | 0.5687 | 0.5746    | 0.5732 |
+| No log        | 5.0   | 330  | 0.8226          | 0.5747   | 0.5711 | 0.5791    | 0.5747 |
+| No log        | 6.0   | 396  | 0.9070          | 0.6098   | 0.6084 | 0.6126    | 0.6098 |
+| No log        | 7.0   | 462  | 0.9877          | 0.6296   | 0.6288 | 0.6299    | 0.6296 |
+| 0.4904        | 8.0   | 528  | 1.2868          | 0.5976   | 0.5814 | 0.6198    | 0.5976 |
+| 0.4904        | 9.0   | 594  | 1.2709          | 0.6433   | 0.6396 | 0.6517    | 0.6433 |
+| 0.4904        | 10.0  | 660  | 1.3541          | 0.6494   | 0.6494 | 0.6494    | 0.6494 |
+| 0.4904        | 11.0  | 726  | 1.4138          | 0.6631   | 0.6572 | 0.6724    | 0.6631 |
+| 0.4904        | 12.0  | 792  | 1.5116          | 0.6631   | 0.6616 | 0.6676    | 0.6631 |
+| 0.4904        | 13.0  | 858  | 1.5349          | 0.6738   | 0.6687 | 0.6825    | 0.6738 |
+| 0.4904        | 14.0  | 924  | 1.5437          | 0.6845   | 0.6845 | 0.6845    | 0.6845 |
+| 0.4904        | 15.0  | 990  | 1.8465          | 0.6585   | 0.6581 | 0.6588    | 0.6585 |
+| 0.0493        | 16.0  | 1056 | 1.8186          | 0.6662   | 0.6661 | 0.6667    | 0.6662 |
+| 0.0493        | 17.0  | 1122 | 1.9234          | 0.6601   | 0.6589 | 0.6635    | 0.6601 |
+| 0.0493        | 18.0  | 1188 | 1.9517          | 0.6707   | 0.6689 | 0.6763    | 0.6707 |
+| 0.0493        | 19.0  | 1254 | 1.9673          | 0.6616   | 0.6609 | 0.6639    | 0.6616 |
+| 0.0493        | 20.0  | 1320 | 2.0034          | 0.6768   | 0.6768 | 0.6769    | 0.6768 |
+| 0.0493        | 21.0  | 1386 | 2.0452          | 0.6707   | 0.6707 | 0.6707    | 0.6707 |
+| 0.0493        | 22.0  | 1452 | 2.1151          | 0.6570   | 0.6569 | 0.6578    | 0.6570 |
+| 0.0085        | 23.0  | 1518 | 2.0888          | 0.6631   | 0.6627 | 0.6633    | 0.6631 |
+| 0.0085        | 24.0  | 1584 | 2.1101          | 0.6646   | 0.6646 | 0.6649    | 0.6646 |
+| 0.0085        | 25.0  | 1650 | 2.1330          | 0.6662   | 0.6661 | 0.6666    | 0.6662 |
+| 0.0085        | 26.0  | 1716 | 2.1890          | 0.6662   | 0.6659 | 0.6663    | 0.6662 |
+| 0.0085        | 27.0  | 1782 | 2.2275          | 0.6601   | 0.6598 | 0.6602    | 0.6601 |
+| 0.0085        | 28.0  | 1848 | 2.2380          | 0.6662   | 0.6648 | 0.6704    | 0.6662 |
+| 0.0085        | 29.0  | 1914 | 2.2606          | 0.6646   | 0.6646 | 0.6650    | 0.6646 |
+| 0.0085        | 30.0  | 1980 | 2.2708          | 0.6738   | 0.6734 | 0.6755    | 0.6738 |
+| 0.0029        | 31.0  | 2046 | 2.2827          | 0.6677   | 0.6675 | 0.6677    | 0.6677 |
+| 0.0029        | 32.0  | 2112 | 2.2992          | 0.6738   | 0.6738 | 0.6738    | 0.6738 |
+| 0.0029        | 33.0  | 2178 | 2.2926          | 0.6768   | 0.6757 | 0.6782    | 0.6768 |
+| 0.0029        | 34.0  | 2244 | 2.3100          | 0.6738   | 0.6738 | 0.6740    | 0.6738 |
+| 0.0029        | 35.0  | 2310 | 2.3081          | 0.6768   | 0.6767 | 0.6768    | 0.6768 |
+| 0.0029        | 36.0  | 2376 | 2.3080          | 0.6768   | 0.6764 | 0.6772    | 0.6768 |
+| 0.0029        | 37.0  | 2442 | 2.3242          | 0.6784   | 0.6783 | 0.6787    | 0.6784 |
+| 0.0004        | 38.0  | 2508 | 2.3252          | 0.6799   | 0.6799 | 0.6799    | 0.6799 |
+| 0.0004        | 39.0  | 2574 | 2.3228          | 0.6784   | 0.6782 | 0.6784    | 0.6784 |
+| 0.0004        | 40.0  | 2640 | 2.3256          | 0.6768   | 0.6764 | 0.6772    | 0.6768 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,25 +1,27 @@
 {
-  "_name_or_path": "distilbert-base-uncased",
-  "activation": "gelu",
   "architectures": [
-    "DistilBertForSequenceClassification"
   ],
-  "attention_dropout": 0.1,
-  "dim": 768,
-  "dropout": 0.1,
-  "hidden_dim": 3072,
   "initializer_range": 0.02,
   "max_position_embeddings": 512,
-  "model_type": "distilbert",
-  "n_heads": 12,
-  "n_layers": 6,
   "pad_token_id": 0,
   "problem_type": "single_label_classification",
-  "qa_dropout": 0.1,
-  "seq_classif_dropout": 0.2,
-  "sinusoidal_pos_embds": false,
-  "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.36.2",
   "vocab_size": 30522
 }

 {
+  "_name_or_path": "bert-base-uncased",
   "architectures": [
+    "BertForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
   "pad_token_id": 0,
+  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.36.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
   "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f4e0e1acab1ee7f357a91355c6277e86f3965248068a99f27a3b59ce70786ef
-size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0619759bf5cfc259f2ec81089dfab2c9814889fdfbd56b28accaf3611a51a3c
+size 437958648

tokenizer_config.json CHANGED Viewed

@@ -50,6 +50,6 @@
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
-  "tokenizer_class": "DistilBertTokenizer",
   "unk_token": "[UNK]"
 }

   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:189145fa49bd9b944febdb6bce02a06702bca4aaf6ed343bad063589e753e212
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:3218350d392bc2a9b8f25d0d2b431f6befce56a5cbf78f0db7e2958ccf64348d
 size 4664