gokuls commited on
Commit
98c7af4
·
1 Parent(s): e3988e0

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
@@ -12,7 +14,7 @@ model-index:
12
  name: Text Classification
13
  type: text-classification
14
  dataset:
15
- name: glue
16
  type: glue
17
  config: sst2
18
  split: validation
@@ -20,7 +22,7 @@ model-index:
20
  metrics:
21
  - name: Accuracy
22
  type: accuracy
23
- value: 0.8004587155963303
24
  ---
25
 
26
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -28,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
28
 
29
  # hBERTv1_no_pretrain_sst2
30
 
31
- This model is a fine-tuned version of [](https://huggingface.co/) on the glue dataset.
32
  It achieves the following results on the evaluation set:
33
- - Loss: 0.5317
34
- - Accuracy: 0.8005
35
 
36
  ## Model description
37
 
 
1
  ---
2
+ language:
3
+ - en
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
14
  name: Text Classification
15
  type: text-classification
16
  dataset:
17
+ name: GLUE SST2
18
  type: glue
19
  config: sst2
20
  split: validation
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8107798165137615
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # hBERTv1_no_pretrain_sst2
32
 
33
+ This model is a fine-tuned version of [](https://huggingface.co/) on the GLUE SST2 dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.4434
36
+ - Accuracy: 0.8108
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 7.0,
3
- "eval_accuracy": 0.5091743119266054,
4
- "eval_loss": 0.6953763365745544,
5
- "eval_runtime": 1.5195,
6
  "eval_samples": 872,
7
- "eval_samples_per_second": 573.888,
8
- "eval_steps_per_second": 4.607,
9
- "train_loss": 0.6925539640787011,
10
- "train_runtime": 2513.0909,
11
  "train_samples": 67349,
12
- "train_samples_per_second": 1339.963,
13
- "train_steps_per_second": 10.485
14
  }
 
1
  {
2
+ "epoch": 6.0,
3
+ "eval_accuracy": 0.8107798165137615,
4
+ "eval_loss": 0.44338709115982056,
5
+ "eval_runtime": 4.0336,
6
  "eval_samples": 872,
7
+ "eval_samples_per_second": 216.182,
8
+ "eval_steps_per_second": 2.479,
9
+ "train_loss": 0.26820123931508005,
10
+ "train_runtime": 5787.7575,
11
  "train_samples": 67349,
12
+ "train_samples_per_second": 581.823,
13
+ "train_steps_per_second": 6.065
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 7.0,
3
- "eval_accuracy": 0.5091743119266054,
4
- "eval_loss": 0.6953763365745544,
5
- "eval_runtime": 1.5195,
6
  "eval_samples": 872,
7
- "eval_samples_per_second": 573.888,
8
- "eval_steps_per_second": 4.607
9
  }
 
1
  {
2
+ "epoch": 6.0,
3
+ "eval_accuracy": 0.8107798165137615,
4
+ "eval_loss": 0.44338709115982056,
5
+ "eval_runtime": 4.0336,
6
  "eval_samples": 872,
7
+ "eval_samples_per_second": 216.182,
8
+ "eval_steps_per_second": 2.479
9
  }
logs/events.out.tfevents.1686827911.garda.2667877.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6af58848c0b855d121ed758d81d5927c7dc323586f421b4fb0a2bb9cb368273
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.0,
3
- "train_loss": 0.6925539640787011,
4
- "train_runtime": 2513.0909,
5
  "train_samples": 67349,
6
- "train_samples_per_second": 1339.963,
7
- "train_steps_per_second": 10.485
8
  }
 
1
  {
2
+ "epoch": 6.0,
3
+ "train_loss": 0.26820123931508005,
4
+ "train_runtime": 5787.7575,
5
  "train_samples": 67349,
6
+ "train_samples_per_second": 581.823,
7
+ "train_steps_per_second": 6.065
8
  }
trainer_state.json CHANGED
@@ -1,130 +1,115 @@
1
  {
2
- "best_metric": 0.6953763365745544,
3
- "best_model_checkpoint": "hBERTv1_no_pretrain_sst2/checkpoint-1054",
4
- "epoch": 7.0,
5
- "global_step": 3689,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 0.0004900379506641366,
13
- "loss": 0.7262,
14
- "step": 527
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.5091743119266054,
19
- "eval_loss": 0.6957851052284241,
20
- "eval_runtime": 1.5307,
21
- "eval_samples_per_second": 569.664,
22
- "eval_steps_per_second": 4.573,
23
- "step": 527
24
  },
25
  {
26
  "epoch": 2.0,
27
- "learning_rate": 0.00048003795066413666,
28
- "loss": 0.6878,
29
- "step": 1054
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_accuracy": 0.5091743119266054,
34
- "eval_loss": 0.6953763365745544,
35
- "eval_runtime": 1.5334,
36
- "eval_samples_per_second": 568.671,
37
- "eval_steps_per_second": 4.565,
38
- "step": 1054
39
  },
40
  {
41
  "epoch": 3.0,
42
- "learning_rate": 0.00047003795066413663,
43
- "loss": 0.687,
44
- "step": 1581
45
  },
46
  {
47
  "epoch": 3.0,
48
- "eval_accuracy": 0.5091743119266054,
49
- "eval_loss": 0.7005088925361633,
50
- "eval_runtime": 1.5295,
51
- "eval_samples_per_second": 570.132,
52
- "eval_steps_per_second": 4.577,
53
- "step": 1581
54
  },
55
  {
56
  "epoch": 4.0,
57
- "learning_rate": 0.0004600379506641366,
58
- "loss": 0.6868,
59
- "step": 2108
60
  },
61
  {
62
  "epoch": 4.0,
63
- "eval_accuracy": 0.5091743119266054,
64
- "eval_loss": 0.6974157094955444,
65
- "eval_runtime": 1.531,
66
- "eval_samples_per_second": 569.563,
67
- "eval_steps_per_second": 4.572,
68
- "step": 2108
69
  },
70
  {
71
  "epoch": 5.0,
72
- "learning_rate": 0.00045003795066413663,
73
- "loss": 0.6868,
74
- "step": 2635
75
  },
76
  {
77
  "epoch": 5.0,
78
- "eval_accuracy": 0.5091743119266054,
79
- "eval_loss": 0.6966003775596619,
80
- "eval_runtime": 1.5282,
81
- "eval_samples_per_second": 570.611,
82
- "eval_steps_per_second": 4.581,
83
- "step": 2635
84
  },
85
  {
86
  "epoch": 6.0,
87
- "learning_rate": 0.00044003795066413666,
88
- "loss": 0.6868,
89
- "step": 3162
90
  },
91
  {
92
  "epoch": 6.0,
93
- "eval_accuracy": 0.5091743119266054,
94
- "eval_loss": 0.6974246501922607,
95
- "eval_runtime": 1.5339,
96
- "eval_samples_per_second": 568.489,
97
- "eval_steps_per_second": 4.564,
98
- "step": 3162
99
  },
100
  {
101
- "epoch": 7.0,
102
- "learning_rate": 0.00043003795066413663,
103
- "loss": 0.6865,
104
- "step": 3689
105
- },
106
- {
107
- "epoch": 7.0,
108
- "eval_accuracy": 0.5091743119266054,
109
- "eval_loss": 0.6969632506370544,
110
- "eval_runtime": 1.532,
111
- "eval_samples_per_second": 569.18,
112
- "eval_steps_per_second": 4.569,
113
- "step": 3689
114
- },
115
- {
116
- "epoch": 7.0,
117
- "step": 3689,
118
- "total_flos": 6.981918485852979e+16,
119
- "train_loss": 0.6925539640787011,
120
- "train_runtime": 2513.0909,
121
- "train_samples_per_second": 1339.963,
122
- "train_steps_per_second": 10.485
123
  }
124
  ],
125
- "max_steps": 26350,
126
  "num_train_epochs": 50,
127
- "total_flos": 6.981918485852979e+16,
128
  "trial_name": null,
129
  "trial_params": null
130
  }
 
1
  {
2
+ "best_metric": 0.44338709115982056,
3
+ "best_model_checkpoint": "hBERTv1_no_pretrain_sst2/checkpoint-702",
4
+ "epoch": 6.0,
5
+ "global_step": 4212,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 3.9200000000000004e-05,
13
+ "loss": 0.4323,
14
+ "step": 702
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.8107798165137615,
19
+ "eval_loss": 0.44338709115982056,
20
+ "eval_runtime": 4.0911,
21
+ "eval_samples_per_second": 213.147,
22
+ "eval_steps_per_second": 2.444,
23
+ "step": 702
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "learning_rate": 3.8400000000000005e-05,
28
+ "loss": 0.2664,
29
+ "step": 1404
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "eval_accuracy": 0.801605504587156,
34
+ "eval_loss": 0.5413224697113037,
35
+ "eval_runtime": 4.0747,
36
+ "eval_samples_per_second": 214.002,
37
+ "eval_steps_per_second": 2.454,
38
+ "step": 1404
39
  },
40
  {
41
  "epoch": 3.0,
42
+ "learning_rate": 3.76e-05,
43
+ "loss": 0.2222,
44
+ "step": 2106
45
  },
46
  {
47
  "epoch": 3.0,
48
+ "eval_accuracy": 0.8130733944954128,
49
+ "eval_loss": 0.5243069529533386,
50
+ "eval_runtime": 4.0727,
51
+ "eval_samples_per_second": 214.11,
52
+ "eval_steps_per_second": 2.455,
53
+ "step": 2106
54
  },
55
  {
56
  "epoch": 4.0,
57
+ "learning_rate": 3.680000000000001e-05,
58
+ "loss": 0.2092,
59
+ "step": 2808
60
  },
61
  {
62
  "epoch": 4.0,
63
+ "eval_accuracy": 0.8004587155963303,
64
+ "eval_loss": 0.6012656688690186,
65
+ "eval_runtime": 4.0943,
66
+ "eval_samples_per_second": 212.979,
67
+ "eval_steps_per_second": 2.442,
68
+ "step": 2808
69
  },
70
  {
71
  "epoch": 5.0,
72
+ "learning_rate": 3.6e-05,
73
+ "loss": 0.2346,
74
+ "step": 3510
75
  },
76
  {
77
  "epoch": 5.0,
78
+ "eval_accuracy": 0.8027522935779816,
79
+ "eval_loss": 0.4991849958896637,
80
+ "eval_runtime": 4.0513,
81
+ "eval_samples_per_second": 215.238,
82
+ "eval_steps_per_second": 2.468,
83
+ "step": 3510
84
  },
85
  {
86
  "epoch": 6.0,
87
+ "learning_rate": 3.52e-05,
88
+ "loss": 0.2444,
89
+ "step": 4212
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "eval_accuracy": 0.8004587155963303,
94
+ "eval_loss": 0.5316653251647949,
95
+ "eval_runtime": 4.0478,
96
+ "eval_samples_per_second": 215.428,
97
+ "eval_steps_per_second": 2.471,
98
+ "step": 4212
99
  },
100
  {
101
+ "epoch": 6.0,
102
+ "step": 4212,
103
+ "total_flos": 5.984501559302554e+16,
104
+ "train_loss": 0.26820123931508005,
105
+ "train_runtime": 5787.7575,
106
+ "train_samples_per_second": 581.823,
107
+ "train_steps_per_second": 6.065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  }
109
  ],
110
+ "max_steps": 35100,
111
  "num_train_epochs": 50,
112
+ "total_flos": 5.984501559302554e+16,
113
  "trial_name": null,
114
  "trial_params": null
115
  }