davidgaofc commited on
Commit
aebb405
·
1 Parent(s): 81e2dca

End of training

Browse files
Files changed (5) hide show
  1. README.md +47 -16
  2. config.json +16 -14
  3. model.safetensors +2 -2
  4. tokenizer_config.json +1 -1
  5. training_args.bin +1 -1
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: distilbert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -18,12 +18,12 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # training
20
 
21
- This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 1.0202
24
  - Accuracy: 0.6768
25
- - F1: 0.6767
26
- - Precision: 0.6768
27
  - Recall: 0.6768
28
 
29
  ## Model description
@@ -44,26 +44,57 @@ More information needed
44
 
45
  The following hyperparameters were used during training:
46
  - learning_rate: 2e-05
47
- - train_batch_size: 20
48
  - eval_batch_size: 20
49
  - seed: 42
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
  - lr_scheduler_type: linear
52
- - num_epochs: 9
53
 
54
  ### Training results
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
57
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
58
- | No log | 1.0 | 132 | 0.6941 | 0.5076 | 0.3418 | 0.2577 | 0.5076 |
59
- | No log | 2.0 | 264 | 0.6933 | 0.5107 | 0.4370 | 0.5347 | 0.5107 |
60
- | No log | 3.0 | 396 | 0.6766 | 0.5762 | 0.5742 | 0.5766 | 0.5762 |
61
- | 0.6466 | 4.0 | 528 | 0.7121 | 0.6067 | 0.6061 | 0.6068 | 0.6067 |
62
- | 0.6466 | 5.0 | 660 | 0.7875 | 0.6448 | 0.6367 | 0.6624 | 0.6448 |
63
- | 0.6466 | 6.0 | 792 | 0.8395 | 0.6692 | 0.6664 | 0.6771 | 0.6692 |
64
- | 0.6466 | 7.0 | 924 | 0.9008 | 0.6768 | 0.6765 | 0.6783 | 0.6768 |
65
- | 0.2623 | 8.0 | 1056 | 0.9956 | 0.6707 | 0.6703 | 0.6711 | 0.6707 |
66
- | 0.2623 | 9.0 | 1188 | 1.0202 | 0.6768 | 0.6767 | 0.6768 | 0.6768 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: bert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
18
 
19
  # training
20
 
21
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.3256
24
  - Accuracy: 0.6768
25
+ - F1: 0.6764
26
+ - Precision: 0.6772
27
  - Recall: 0.6768
28
 
29
  ## Model description
 
44
 
45
  The following hyperparameters were used during training:
46
  - learning_rate: 2e-05
47
+ - train_batch_size: 40
48
  - eval_batch_size: 20
49
  - seed: 42
50
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
51
  - lr_scheduler_type: linear
52
+ - num_epochs: 40
53
 
54
  ### Training results
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
57
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
58
+ | No log | 1.0 | 66 | 0.7029 | 0.4939 | 0.3623 | 0.4289 | 0.4939 |
59
+ | No log | 2.0 | 132 | 0.6985 | 0.4726 | 0.4074 | 0.4429 | 0.4726 |
60
+ | No log | 3.0 | 198 | 0.7052 | 0.5091 | 0.5079 | 0.5101 | 0.5091 |
61
+ | No log | 4.0 | 264 | 0.7277 | 0.5732 | 0.5687 | 0.5746 | 0.5732 |
62
+ | No log | 5.0 | 330 | 0.8226 | 0.5747 | 0.5711 | 0.5791 | 0.5747 |
63
+ | No log | 6.0 | 396 | 0.9070 | 0.6098 | 0.6084 | 0.6126 | 0.6098 |
64
+ | No log | 7.0 | 462 | 0.9877 | 0.6296 | 0.6288 | 0.6299 | 0.6296 |
65
+ | 0.4904 | 8.0 | 528 | 1.2868 | 0.5976 | 0.5814 | 0.6198 | 0.5976 |
66
+ | 0.4904 | 9.0 | 594 | 1.2709 | 0.6433 | 0.6396 | 0.6517 | 0.6433 |
67
+ | 0.4904 | 10.0 | 660 | 1.3541 | 0.6494 | 0.6494 | 0.6494 | 0.6494 |
68
+ | 0.4904 | 11.0 | 726 | 1.4138 | 0.6631 | 0.6572 | 0.6724 | 0.6631 |
69
+ | 0.4904 | 12.0 | 792 | 1.5116 | 0.6631 | 0.6616 | 0.6676 | 0.6631 |
70
+ | 0.4904 | 13.0 | 858 | 1.5349 | 0.6738 | 0.6687 | 0.6825 | 0.6738 |
71
+ | 0.4904 | 14.0 | 924 | 1.5437 | 0.6845 | 0.6845 | 0.6845 | 0.6845 |
72
+ | 0.4904 | 15.0 | 990 | 1.8465 | 0.6585 | 0.6581 | 0.6588 | 0.6585 |
73
+ | 0.0493 | 16.0 | 1056 | 1.8186 | 0.6662 | 0.6661 | 0.6667 | 0.6662 |
74
+ | 0.0493 | 17.0 | 1122 | 1.9234 | 0.6601 | 0.6589 | 0.6635 | 0.6601 |
75
+ | 0.0493 | 18.0 | 1188 | 1.9517 | 0.6707 | 0.6689 | 0.6763 | 0.6707 |
76
+ | 0.0493 | 19.0 | 1254 | 1.9673 | 0.6616 | 0.6609 | 0.6639 | 0.6616 |
77
+ | 0.0493 | 20.0 | 1320 | 2.0034 | 0.6768 | 0.6768 | 0.6769 | 0.6768 |
78
+ | 0.0493 | 21.0 | 1386 | 2.0452 | 0.6707 | 0.6707 | 0.6707 | 0.6707 |
79
+ | 0.0493 | 22.0 | 1452 | 2.1151 | 0.6570 | 0.6569 | 0.6578 | 0.6570 |
80
+ | 0.0085 | 23.0 | 1518 | 2.0888 | 0.6631 | 0.6627 | 0.6633 | 0.6631 |
81
+ | 0.0085 | 24.0 | 1584 | 2.1101 | 0.6646 | 0.6646 | 0.6649 | 0.6646 |
82
+ | 0.0085 | 25.0 | 1650 | 2.1330 | 0.6662 | 0.6661 | 0.6666 | 0.6662 |
83
+ | 0.0085 | 26.0 | 1716 | 2.1890 | 0.6662 | 0.6659 | 0.6663 | 0.6662 |
84
+ | 0.0085 | 27.0 | 1782 | 2.2275 | 0.6601 | 0.6598 | 0.6602 | 0.6601 |
85
+ | 0.0085 | 28.0 | 1848 | 2.2380 | 0.6662 | 0.6648 | 0.6704 | 0.6662 |
86
+ | 0.0085 | 29.0 | 1914 | 2.2606 | 0.6646 | 0.6646 | 0.6650 | 0.6646 |
87
+ | 0.0085 | 30.0 | 1980 | 2.2708 | 0.6738 | 0.6734 | 0.6755 | 0.6738 |
88
+ | 0.0029 | 31.0 | 2046 | 2.2827 | 0.6677 | 0.6675 | 0.6677 | 0.6677 |
89
+ | 0.0029 | 32.0 | 2112 | 2.2992 | 0.6738 | 0.6738 | 0.6738 | 0.6738 |
90
+ | 0.0029 | 33.0 | 2178 | 2.2926 | 0.6768 | 0.6757 | 0.6782 | 0.6768 |
91
+ | 0.0029 | 34.0 | 2244 | 2.3100 | 0.6738 | 0.6738 | 0.6740 | 0.6738 |
92
+ | 0.0029 | 35.0 | 2310 | 2.3081 | 0.6768 | 0.6767 | 0.6768 | 0.6768 |
93
+ | 0.0029 | 36.0 | 2376 | 2.3080 | 0.6768 | 0.6764 | 0.6772 | 0.6768 |
94
+ | 0.0029 | 37.0 | 2442 | 2.3242 | 0.6784 | 0.6783 | 0.6787 | 0.6784 |
95
+ | 0.0004 | 38.0 | 2508 | 2.3252 | 0.6799 | 0.6799 | 0.6799 | 0.6799 |
96
+ | 0.0004 | 39.0 | 2574 | 2.3228 | 0.6784 | 0.6782 | 0.6784 | 0.6784 |
97
+ | 0.0004 | 40.0 | 2640 | 2.3256 | 0.6768 | 0.6764 | 0.6772 | 0.6768 |
98
 
99
 
100
  ### Framework versions
config.json CHANGED
@@ -1,25 +1,27 @@
1
  {
2
- "_name_or_path": "distilbert-base-uncased",
3
- "activation": "gelu",
4
  "architectures": [
5
- "DistilBertForSequenceClassification"
6
  ],
7
- "attention_dropout": 0.1,
8
- "dim": 768,
9
- "dropout": 0.1,
10
- "hidden_dim": 3072,
 
 
11
  "initializer_range": 0.02,
 
 
12
  "max_position_embeddings": 512,
13
- "model_type": "distilbert",
14
- "n_heads": 12,
15
- "n_layers": 6,
16
  "pad_token_id": 0,
 
17
  "problem_type": "single_label_classification",
18
- "qa_dropout": 0.1,
19
- "seq_classif_dropout": 0.2,
20
- "sinusoidal_pos_embds": false,
21
- "tie_weights_": true,
22
  "torch_dtype": "float32",
23
  "transformers_version": "4.36.2",
 
 
24
  "vocab_size": 30522
25
  }
 
1
  {
2
+ "_name_or_path": "bert-base-uncased",
 
3
  "architectures": [
4
+ "BertForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
  "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
  "problem_type": "single_label_classification",
 
 
 
 
22
  "torch_dtype": "float32",
23
  "transformers_version": "4.36.2",
24
+ "type_vocab_size": 2,
25
+ "use_cache": true,
26
  "vocab_size": 30522
27
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f4e0e1acab1ee7f357a91355c6277e86f3965248068a99f27a3b59ce70786ef
3
- size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0619759bf5cfc259f2ec81089dfab2c9814889fdfbd56b28accaf3611a51a3c
3
+ size 437958648
tokenizer_config.json CHANGED
@@ -50,6 +50,6 @@
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
52
  "tokenize_chinese_chars": true,
53
- "tokenizer_class": "DistilBertTokenizer",
54
  "unk_token": "[UNK]"
55
  }
 
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,
52
  "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
  "unk_token": "[UNK]"
55
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:189145fa49bd9b944febdb6bce02a06702bca4aaf6ed343bad063589e753e212
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3218350d392bc2a9b8f25d0d2b431f6befce56a5cbf78f0db7e2958ccf64348d
3
  size 4664