mp-02 commited on
Commit
cd41993
1 Parent(s): cc4eda0

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 26.67,
3
- "eval_accuracy": 0.8414101975195223,
4
- "eval_f1": 0.8868156150257795,
5
- "eval_loss": 0.6016380190849304,
6
- "eval_precision": 0.8712011577424024,
7
  "eval_recall": 0.903,
8
- "eval_runtime": 3.7201,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 14.516,
11
- "eval_steps_per_second": 1.613,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
@@ -17,9 +17,9 @@
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
- "train_loss": 0.3642783355712891,
21
- "train_runtime": 422.552,
22
  "train_samples": 150,
23
- "train_samples_per_second": 9.466,
24
- "train_steps_per_second": 0.947
25
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.8434772622875517,
4
+ "eval_f1": 0.8826979472140762,
5
+ "eval_loss": 0.5468625426292419,
6
+ "eval_precision": 0.8632887189292543,
7
  "eval_recall": 0.903,
8
+ "eval_runtime": 3.697,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 14.607,
11
+ "eval_steps_per_second": 1.623,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
 
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
+ "train_loss": 0.4636675516764323,
21
+ "train_runtime": 318.7136,
22
  "train_samples": 150,
23
+ "train_samples_per_second": 9.413,
24
+ "train_steps_per_second": 0.941
25
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 26.67,
3
- "eval_accuracy": 0.8414101975195223,
4
- "eval_f1": 0.8868156150257795,
5
- "eval_loss": 0.6016380190849304,
6
- "eval_precision": 0.8712011577424024,
7
  "eval_recall": 0.903,
8
- "eval_runtime": 3.7201,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 14.516,
11
- "eval_steps_per_second": 1.613
12
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.8434772622875517,
4
+ "eval_f1": 0.8826979472140762,
5
+ "eval_loss": 0.5468625426292419,
6
+ "eval_precision": 0.8632887189292543,
7
  "eval_recall": 0.903,
8
+ "eval_runtime": 3.697,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 14.607,
11
+ "eval_steps_per_second": 1.623
12
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8356f85a16aec70deef67c107e63fd773b89ce3c9d102cc38b7811b85a8d9f28
3
  size 501420883
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6676119aed64fb98a2529aadd0eb6d8a586dc32bcd9cdd67336343409ea44e02
3
  size 501420883
runs/Aug24_19-05-23_bernini/1724519134.3266222/events.out.tfevents.1724519134.bernini.17504.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3567ad87f4f04882cac3ee81fab510fe0ea8bd30b664c041c60a6771b1fee1ed
3
+ size 4665
runs/Aug24_19-05-23_bernini/events.out.tfevents.1724519134.bernini.17504.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f5911ff645def4a8801e6d52fe4b852492f56fb2b139d66696c69465668ed94
3
+ size 4084
runs/Aug24_19-05-59_bernini/1724519169.7377124/events.out.tfevents.1724519169.bernini.18373.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23326fd504bb10a270cd4df7abfdf08f9e8526a47c6a70f7858452efead7b18d
3
+ size 4665
runs/Aug24_19-05-59_bernini/events.out.tfevents.1724519169.bernini.18373.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b272c865a684aecff29dccecb35f4af06910a0c7efbeee0206fe484b11f5d846
3
+ size 10057
runs/Aug24_19-05-59_bernini/events.out.tfevents.1724519497.bernini.18373.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c79aa0dc0ca446e6bee51eae633e866665dabf054bb73041944a0c4bf6c76ff
3
+ size 512
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 26.67,
3
- "train_loss": 0.3642783355712891,
4
- "train_runtime": 422.552,
5
  "train_samples": 150,
6
- "train_samples_per_second": 9.466,
7
- "train_steps_per_second": 0.947
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "train_loss": 0.4636675516764323,
4
+ "train_runtime": 318.7136,
5
  "train_samples": 150,
6
+ "train_samples_per_second": 9.413,
7
+ "train_steps_per_second": 0.941
8
  }
trainer_state.json CHANGED
@@ -1,217 +1,169 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 26.666666666666668,
5
- "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.67,
12
- "eval_accuracy": 0.6659393661001378,
13
- "eval_f1": 0.5035663338088445,
14
- "eval_loss": 1.1944581270217896,
15
- "eval_precision": 0.4800543970988214,
16
- "eval_recall": 0.5295,
17
- "eval_runtime": 3.6912,
18
- "eval_samples_per_second": 14.629,
19
- "eval_steps_per_second": 1.625,
20
  "step": 25
21
  },
22
  {
23
  "epoch": 3.33,
24
  "eval_accuracy": 0.7687184198438217,
25
- "eval_f1": 0.7759615384615385,
26
- "eval_loss": 0.7770130038261414,
27
- "eval_precision": 0.7472222222222222,
28
- "eval_recall": 0.807,
29
- "eval_runtime": 3.7057,
30
- "eval_samples_per_second": 14.572,
31
- "eval_steps_per_second": 1.619,
32
  "step": 50
33
  },
34
  {
35
  "epoch": 5.0,
36
- "eval_accuracy": 0.8118971061093248,
37
- "eval_f1": 0.8231029482841953,
38
- "eval_loss": 0.5989910364151001,
39
- "eval_precision": 0.7965388213283442,
40
- "eval_recall": 0.8515,
41
- "eval_runtime": 3.6962,
42
- "eval_samples_per_second": 14.61,
43
- "eval_steps_per_second": 1.623,
44
  "step": 75
45
  },
46
  {
47
  "epoch": 6.67,
48
- "eval_accuracy": 0.7702112999540652,
49
- "eval_f1": 0.8409471418320977,
50
- "eval_loss": 0.6813620924949646,
51
- "eval_precision": 0.8060522696011004,
52
- "eval_recall": 0.879,
53
- "eval_runtime": 3.7519,
54
- "eval_samples_per_second": 14.393,
55
- "eval_steps_per_second": 1.599,
56
  "step": 100
57
  },
58
  {
59
  "epoch": 8.33,
60
- "eval_accuracy": 0.8316490583371612,
61
- "eval_f1": 0.8467469879518073,
62
- "eval_loss": 0.5391940474510193,
63
- "eval_precision": 0.8172093023255814,
64
- "eval_recall": 0.8785,
65
- "eval_runtime": 3.7222,
66
- "eval_samples_per_second": 14.508,
67
- "eval_steps_per_second": 1.612,
68
  "step": 125
69
  },
70
  {
71
  "epoch": 10.0,
72
- "eval_accuracy": 0.811437758383096,
73
- "eval_f1": 0.8603297769156159,
74
- "eval_loss": 0.5438030362129211,
75
- "eval_precision": 0.835216572504708,
76
- "eval_recall": 0.887,
77
- "eval_runtime": 3.7662,
78
- "eval_samples_per_second": 14.338,
79
- "eval_steps_per_second": 1.593,
80
  "step": 150
81
  },
82
  {
83
  "epoch": 11.67,
84
- "eval_accuracy": 0.838194763435921,
85
- "eval_f1": 0.8752145133611179,
86
- "eval_loss": 0.5429797172546387,
87
- "eval_precision": 0.8585858585858586,
88
- "eval_recall": 0.8925,
89
- "eval_runtime": 3.7544,
90
- "eval_samples_per_second": 14.383,
91
- "eval_steps_per_second": 1.598,
92
  "step": 175
93
  },
94
  {
95
  "epoch": 13.33,
96
- "eval_accuracy": 0.8257923748277446,
97
- "eval_f1": 0.8691817736403724,
98
- "eval_loss": 0.5896742939949036,
99
- "eval_precision": 0.8520653218059558,
100
- "eval_recall": 0.887,
101
- "eval_runtime": 3.775,
102
- "eval_samples_per_second": 14.305,
103
- "eval_steps_per_second": 1.589,
104
  "step": 200
105
  },
106
  {
107
  "epoch": 15.0,
108
- "eval_accuracy": 0.8430179145613229,
109
- "eval_f1": 0.8809756097560977,
110
- "eval_loss": 0.5427059531211853,
111
- "eval_precision": 0.86,
112
- "eval_recall": 0.903,
113
- "eval_runtime": 3.6868,
114
- "eval_samples_per_second": 14.647,
115
- "eval_steps_per_second": 1.627,
116
  "step": 225
117
  },
118
  {
119
  "epoch": 16.67,
120
- "eval_accuracy": 0.8410656867248507,
121
- "eval_f1": 0.8822512959763021,
122
- "eval_loss": 0.5541779398918152,
123
- "eval_precision": 0.871282301316431,
124
- "eval_recall": 0.8935,
125
- "eval_runtime": 3.7479,
126
- "eval_samples_per_second": 14.408,
127
- "eval_steps_per_second": 1.601,
128
  "step": 250
129
  },
130
  {
131
  "epoch": 18.33,
132
- "eval_accuracy": 0.83210840606339,
133
- "eval_f1": 0.8821515892420537,
134
- "eval_loss": 0.6013592481613159,
135
- "eval_precision": 0.8631578947368421,
136
- "eval_recall": 0.902,
137
- "eval_runtime": 3.7532,
138
- "eval_samples_per_second": 14.388,
139
- "eval_steps_per_second": 1.599,
140
  "step": 275
141
  },
142
  {
143
  "epoch": 20.0,
144
- "eval_accuracy": 0.8365870463941204,
145
- "eval_f1": 0.8843036109064112,
146
- "eval_loss": 0.5971384644508362,
147
- "eval_precision": 0.8691453404152584,
148
- "eval_recall": 0.9,
149
- "eval_runtime": 3.7851,
150
- "eval_samples_per_second": 14.267,
151
- "eval_steps_per_second": 1.585,
152
- "step": 300
153
- },
154
- {
155
- "epoch": 21.67,
156
- "eval_accuracy": 0.8313045475424896,
157
- "eval_f1": 0.8891637803067788,
158
- "eval_loss": 0.598324716091156,
159
- "eval_precision": 0.8800195886385896,
160
- "eval_recall": 0.8985,
161
- "eval_runtime": 3.7401,
162
- "eval_samples_per_second": 14.438,
163
- "eval_steps_per_second": 1.604,
164
- "step": 325
165
- },
166
- {
167
- "epoch": 23.33,
168
- "eval_accuracy": 0.8414101975195223,
169
- "eval_f1": 0.8964497041420119,
170
- "eval_loss": 0.5867139101028442,
171
- "eval_precision": 0.8842412451361867,
172
- "eval_recall": 0.909,
173
- "eval_runtime": 3.7391,
174
- "eval_samples_per_second": 14.442,
175
- "eval_steps_per_second": 1.605,
176
- "step": 350
177
- },
178
- {
179
- "epoch": 25.0,
180
- "eval_accuracy": 0.8465778594395957,
181
- "eval_f1": 0.8928835262250677,
182
- "eval_loss": 0.5947966575622559,
183
- "eval_precision": 0.8796700630761766,
184
- "eval_recall": 0.9065,
185
- "eval_runtime": 3.6661,
186
- "eval_samples_per_second": 14.73,
187
- "eval_steps_per_second": 1.637,
188
- "step": 375
189
- },
190
- {
191
- "epoch": 26.67,
192
- "eval_accuracy": 0.8414101975195223,
193
- "eval_f1": 0.8868156150257795,
194
- "eval_loss": 0.6016380190849304,
195
- "eval_precision": 0.8712011577424024,
196
  "eval_recall": 0.903,
197
- "eval_runtime": 3.6984,
198
- "eval_samples_per_second": 14.601,
199
- "eval_steps_per_second": 1.622,
200
- "step": 400
201
  },
202
  {
203
- "epoch": 26.67,
204
- "step": 400,
205
- "total_flos": 1054421372928000.0,
206
- "train_loss": 0.3642783355712891,
207
- "train_runtime": 422.552,
208
- "train_samples_per_second": 9.466,
209
- "train_steps_per_second": 0.947
210
  }
211
  ],
212
- "max_steps": 400,
213
- "num_train_epochs": 27,
214
- "total_flos": 1054421372928000.0,
215
  "trial_name": null,
216
  "trial_params": null
217
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.67,
12
+ "eval_accuracy": 0.6553743683968765,
13
+ "eval_f1": 0.49752416882810657,
14
+ "eval_loss": 1.2105501890182495,
15
+ "eval_precision": 0.4707719767960732,
16
+ "eval_recall": 0.5275,
17
+ "eval_runtime": 3.73,
18
+ "eval_samples_per_second": 14.477,
19
+ "eval_steps_per_second": 1.609,
20
  "step": 25
21
  },
22
  {
23
  "epoch": 3.33,
24
  "eval_accuracy": 0.7687184198438217,
25
+ "eval_f1": 0.777563793933558,
26
+ "eval_loss": 0.7854474186897278,
27
+ "eval_precision": 0.7497678737233054,
28
+ "eval_recall": 0.8075,
29
+ "eval_runtime": 3.7553,
30
+ "eval_samples_per_second": 14.38,
31
+ "eval_steps_per_second": 1.598,
32
  "step": 50
33
  },
34
  {
35
  "epoch": 5.0,
36
+ "eval_accuracy": 0.8141938447404685,
37
+ "eval_f1": 0.8184898354307841,
38
+ "eval_loss": 0.6001709699630737,
39
+ "eval_precision": 0.7931519699812383,
40
+ "eval_recall": 0.8455,
41
+ "eval_runtime": 3.7036,
42
+ "eval_samples_per_second": 14.58,
43
+ "eval_steps_per_second": 1.62,
44
  "step": 75
45
  },
46
  {
47
  "epoch": 6.67,
48
+ "eval_accuracy": 0.7781350482315113,
49
+ "eval_f1": 0.827977315689981,
50
+ "eval_loss": 0.6523196697235107,
51
+ "eval_precision": 0.7849462365591398,
52
+ "eval_recall": 0.876,
53
+ "eval_runtime": 3.7641,
54
+ "eval_samples_per_second": 14.346,
55
+ "eval_steps_per_second": 1.594,
56
  "step": 100
57
  },
58
  {
59
  "epoch": 8.33,
60
+ "eval_accuracy": 0.8354386770785485,
61
+ "eval_f1": 0.8442622950819673,
62
+ "eval_loss": 0.5189960598945618,
63
+ "eval_precision": 0.8151769087523277,
64
+ "eval_recall": 0.8755,
65
+ "eval_runtime": 3.8049,
66
+ "eval_samples_per_second": 14.192,
67
+ "eval_steps_per_second": 1.577,
68
  "step": 125
69
  },
70
  {
71
  "epoch": 10.0,
72
+ "eval_accuracy": 0.8338309600367478,
73
+ "eval_f1": 0.8588007736943907,
74
+ "eval_loss": 0.5064252018928528,
75
+ "eval_precision": 0.8314606741573034,
76
+ "eval_recall": 0.888,
77
+ "eval_runtime": 3.7776,
78
+ "eval_samples_per_second": 14.295,
79
+ "eval_steps_per_second": 1.588,
80
  "step": 150
81
  },
82
  {
83
  "epoch": 11.67,
84
+ "eval_accuracy": 0.8344051446945338,
85
+ "eval_f1": 0.8693320331545589,
86
+ "eval_loss": 0.5342020392417908,
87
+ "eval_precision": 0.8482397716460514,
88
+ "eval_recall": 0.8915,
89
+ "eval_runtime": 3.753,
90
+ "eval_samples_per_second": 14.388,
91
+ "eval_steps_per_second": 1.599,
92
  "step": 175
93
  },
94
  {
95
  "epoch": 13.33,
96
+ "eval_accuracy": 0.8200505282498851,
97
+ "eval_f1": 0.8703071672354948,
98
+ "eval_loss": 0.5538159012794495,
99
+ "eval_precision": 0.8491912464319695,
100
+ "eval_recall": 0.8925,
101
+ "eval_runtime": 3.8395,
102
+ "eval_samples_per_second": 14.064,
103
+ "eval_steps_per_second": 1.563,
104
  "step": 200
105
  },
106
  {
107
  "epoch": 15.0,
108
+ "eval_accuracy": 0.8348644924207625,
109
+ "eval_f1": 0.8777398928397467,
110
+ "eval_loss": 0.5335590243339539,
111
+ "eval_precision": 0.855650522317189,
112
+ "eval_recall": 0.901,
113
+ "eval_runtime": 3.7278,
114
+ "eval_samples_per_second": 14.486,
115
+ "eval_steps_per_second": 1.61,
116
  "step": 225
117
  },
118
  {
119
  "epoch": 16.67,
120
+ "eval_accuracy": 0.8385392742305926,
121
+ "eval_f1": 0.8764648437499999,
122
+ "eval_loss": 0.5464726686477661,
123
+ "eval_precision": 0.8563931297709924,
124
+ "eval_recall": 0.8975,
125
+ "eval_runtime": 3.7679,
126
+ "eval_samples_per_second": 14.331,
127
+ "eval_steps_per_second": 1.592,
128
  "step": 250
129
  },
130
  {
131
  "epoch": 18.33,
132
+ "eval_accuracy": 0.8439366100137804,
133
+ "eval_f1": 0.8787509148572822,
134
+ "eval_loss": 0.5402917265892029,
135
+ "eval_precision": 0.8580276322058122,
136
+ "eval_recall": 0.9005,
137
+ "eval_runtime": 3.7992,
138
+ "eval_samples_per_second": 14.214,
139
+ "eval_steps_per_second": 1.579,
140
  "step": 275
141
  },
142
  {
143
  "epoch": 20.0,
144
+ "eval_accuracy": 0.8434772622875517,
145
+ "eval_f1": 0.8826979472140762,
146
+ "eval_loss": 0.5468625426292419,
147
+ "eval_precision": 0.8632887189292543,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  "eval_recall": 0.903,
149
+ "eval_runtime": 3.7775,
150
+ "eval_samples_per_second": 14.295,
151
+ "eval_steps_per_second": 1.588,
152
+ "step": 300
153
  },
154
  {
155
+ "epoch": 20.0,
156
+ "step": 300,
157
+ "total_flos": 790816029696000.0,
158
+ "train_loss": 0.4636675516764323,
159
+ "train_runtime": 318.7136,
160
+ "train_samples_per_second": 9.413,
161
+ "train_steps_per_second": 0.941
162
  }
163
  ],
164
+ "max_steps": 300,
165
+ "num_train_epochs": 20,
166
+ "total_flos": 790816029696000.0,
167
  "trial_name": null,
168
  "trial_params": null
169
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18524bbd71a9c1f456e0c326136d3674e674011210065da46f04684edfe11cdc
3
  size 2927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad5c6ae451d7fcc096ada600794b9529838382a07bad9f882b8f80b359b5b02b
3
  size 2927