lekhnathrijal commited on
Commit
27e76c2
·
verified ·
1 Parent(s): 40843d9

ai-research-lab/bert-question-classifier

Browse files
Files changed (4) hide show
  1. README.md +30 -30
  2. config.json +18 -0
  3. model.safetensors +1 -1
  4. tokenizer.json +10 -1
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.8460
25
- - Accuracy: 0.9711
26
- - Recall: 0.8571
27
- - Precision: 0.8371
28
- - F1: 0.8470
29
 
30
  ## Model description
31
 
@@ -51,40 +51,40 @@ The following hyperparameters were used during training:
51
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
52
  - lr_scheduler_type: linear
53
  - lr_scheduler_warmup_ratio: 0.1
54
- - num_epochs: 3
55
 
56
  ### Training results
57
 
58
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | Recall | Precision | F1 |
59
  |:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
60
- | No log | 0.1284 | 100 | 4.9916 | 0.9143 | 0.5142 | 0.5430 | 0.5282 |
61
- | No log | 0.2567 | 200 | 4.3263 | 0.9264 | 0.5870 | 0.6092 | 0.5979 |
62
- | No log | 0.3851 | 300 | 3.8919 | 0.9348 | 0.6636 | 0.6463 | 0.6548 |
63
- | No log | 0.5135 | 400 | 3.5265 | 0.9391 | 0.6599 | 0.6783 | 0.6690 |
64
- | 4.2904 | 0.6418 | 500 | 3.2937 | 0.9452 | 0.7049 | 0.7066 | 0.7057 |
65
- | 4.2904 | 0.7702 | 600 | 3.0129 | 0.9496 | 0.7275 | 0.7305 | 0.7290 |
66
- | 4.2904 | 0.8986 | 700 | 2.8410 | 0.9521 | 0.7482 | 0.7404 | 0.7443 |
67
- | 4.2904 | 1.0270 | 800 | 2.6565 | 0.9552 | 0.7757 | 0.7520 | 0.7637 |
68
- | 4.2904 | 1.1553 | 900 | 2.5233 | 0.9574 | 0.7842 | 0.7647 | 0.7743 |
69
- | 2.7537 | 1.2837 | 1000 | 2.3877 | 0.9598 | 0.7976 | 0.7771 | 0.7872 |
70
- | 2.7537 | 1.4121 | 1100 | 2.2836 | 0.9622 | 0.8146 | 0.7875 | 0.8008 |
71
- | 2.7537 | 1.5404 | 1200 | 2.1776 | 0.9635 | 0.8130 | 0.7990 | 0.8059 |
72
- | 2.7537 | 1.6688 | 1300 | 2.1273 | 0.9653 | 0.8223 | 0.8085 | 0.8153 |
73
- | 2.7537 | 1.7972 | 1400 | 2.0858 | 0.9651 | 0.8251 | 0.8052 | 0.8150 |
74
- | 2.218 | 1.9255 | 1500 | 2.0143 | 0.9670 | 0.8312 | 0.8176 | 0.8243 |
75
- | 2.218 | 2.0539 | 1600 | 1.9800 | 0.9683 | 0.8413 | 0.8226 | 0.8319 |
76
- | 2.218 | 2.1823 | 1700 | 1.9409 | 0.9691 | 0.8470 | 0.8259 | 0.8363 |
77
- | 2.218 | 2.3107 | 1800 | 1.9122 | 0.9693 | 0.8445 | 0.8294 | 0.8369 |
78
- | 2.218 | 2.4390 | 1900 | 1.8876 | 0.9699 | 0.8502 | 0.8310 | 0.8405 |
79
- | 1.8644 | 2.5674 | 2000 | 1.8786 | 0.9695 | 0.8470 | 0.8298 | 0.8383 |
80
- | 1.8644 | 2.6958 | 2100 | 1.8620 | 0.9701 | 0.8510 | 0.8325 | 0.8416 |
81
- | 1.8644 | 2.8241 | 2200 | 1.8494 | 0.9711 | 0.8567 | 0.8370 | 0.8467 |
82
- | 1.8644 | 2.9525 | 2300 | 1.8460 | 0.9711 | 0.8571 | 0.8371 | 0.8470 |
83
 
84
 
85
  ### Framework versions
86
 
87
  - Transformers 4.48.1
88
- - Pytorch 2.5.1
89
  - Datasets 3.2.0
90
  - Tokenizers 0.21.0
 
21
 
22
  This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 1.9854
25
+ - Accuracy: 0.9680
26
+ - Recall: 0.8325
27
+ - Precision: 0.8183
28
+ - F1: 0.8253
29
 
30
  ## Model description
31
 
 
51
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
52
  - lr_scheduler_type: linear
53
  - lr_scheduler_warmup_ratio: 0.1
54
+ - num_epochs: 10
55
 
56
  ### Training results
57
 
58
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | Recall | Precision | F1 |
59
  |:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
60
+ | No log | 0.0959 | 100 | 5.3942 | 0.8771 | 0.1734 | 0.2480 | 0.2041 |
61
+ | No log | 0.1918 | 200 | 5.2038 | 0.8947 | 0.4057 | 0.4183 | 0.4119 |
62
+ | No log | 0.2876 | 300 | 4.9557 | 0.9135 | 0.5096 | 0.5249 | 0.5172 |
63
+ | No log | 0.3835 | 400 | 4.6168 | 0.9214 | 0.5589 | 0.5692 | 0.5640 |
64
+ | 5.0827 | 0.4794 | 500 | 4.2799 | 0.9237 | 0.5698 | 0.5819 | 0.5758 |
65
+ | 5.0827 | 0.5753 | 600 | 4.0766 | 0.9270 | 0.5946 | 0.5996 | 0.5971 |
66
+ | 5.0827 | 0.6711 | 700 | 3.9017 | 0.9327 | 0.6362 | 0.6280 | 0.6320 |
67
+ | 5.0827 | 0.7670 | 800 | 3.7393 | 0.9366 | 0.6672 | 0.6463 | 0.6566 |
68
+ | 5.0827 | 0.8629 | 900 | 3.6190 | 0.9392 | 0.6852 | 0.6592 | 0.6719 |
69
+ | 3.9175 | 0.9588 | 1000 | 3.4255 | 0.9442 | 0.7091 | 0.6873 | 0.6980 |
70
+ | 3.9175 | 1.0547 | 1100 | 3.2749 | 0.9494 | 0.7314 | 0.7174 | 0.7243 |
71
+ | 3.9175 | 1.1505 | 1200 | 3.0196 | 0.9513 | 0.7342 | 0.7315 | 0.7328 |
72
+ | 3.9175 | 1.2464 | 1300 | 2.8139 | 0.9536 | 0.75 | 0.7424 | 0.7462 |
73
+ | 3.9175 | 1.3423 | 1400 | 2.6648 | 0.9587 | 0.7863 | 0.7659 | 0.7759 |
74
+ | 3.024 | 1.4382 | 1500 | 2.5296 | 0.9589 | 0.7810 | 0.7703 | 0.7756 |
75
+ | 3.024 | 1.5340 | 1600 | 2.4344 | 0.9598 | 0.7850 | 0.7757 | 0.7803 |
76
+ | 3.024 | 1.6299 | 1700 | 2.2909 | 0.9617 | 0.8009 | 0.7827 | 0.7917 |
77
+ | 3.024 | 1.7258 | 1800 | 2.2204 | 0.9647 | 0.8065 | 0.8052 | 0.8058 |
78
+ | 3.024 | 1.8217 | 1900 | 2.1540 | 0.9647 | 0.8170 | 0.7994 | 0.8081 |
79
+ | 2.2891 | 1.9175 | 2000 | 2.0844 | 0.9667 | 0.8266 | 0.8108 | 0.8186 |
80
+ | 2.2891 | 2.0134 | 2100 | 1.9854 | 0.9680 | 0.8325 | 0.8183 | 0.8253 |
81
+ | 2.2891 | 2.1093 | 2200 | 1.9240 | 0.9673 | 0.8248 | 0.8174 | 0.8211 |
82
+ | 2.2891 | 2.2052 | 2300 | 1.8725 | 0.9675 | 0.8365 | 0.8121 | 0.8241 |
83
 
84
 
85
  ### Framework versions
86
 
87
  - Transformers 4.48.1
88
+ - Pytorch 2.5.1+cu124
89
  - Datasets 3.2.0
90
  - Tokenizers 0.21.0
config.json CHANGED
@@ -5,6 +5,24 @@
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
 
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
+ "custom_pipelines": {
9
+ "question-classifier": {
10
+ "default": {
11
+ "model": {
12
+ "pt": [
13
+ "ai-research-lab/bert-question-classifier",
14
+ "main"
15
+ ]
16
+ }
17
+ },
18
+ "impl": "classifier_pipeline.MultiTaskClassifierPipeline",
19
+ "pt": [
20
+ "AutoModelForSequenceClassification"
21
+ ],
22
+ "tf": [],
23
+ "type": "text"
24
+ }
25
+ },
26
  "gradient_checkpointing": false,
27
  "hidden_act": "gelu",
28
  "hidden_dropout_prob": 0.1,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee6a30867bd395bed83ab7bf952082313c70333e795358e8914c1d3f5d96186d
3
  size 438057080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79108db630eaa47b9d51c762b7eda3ea6ecdc85a682ed6eaf167c42a4bb64018
3
  size 438057080
tokenizer.json CHANGED
@@ -6,7 +6,16 @@
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
- "padding": null,
 
 
 
 
 
 
 
 
 
10
  "added_tokens": [
11
  {
12
  "id": 0,
 
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 512
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,