mp-02 commited on
Commit
5e1c331
1 Parent(s): c13017b

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 26.67,
3
- "eval_accuracy": 0.8472668810289389,
4
- "eval_f1": 0.9070631970260222,
5
- "eval_loss": 0.8292201161384583,
6
- "eval_precision": 0.8992628992628993,
7
- "eval_recall": 0.915,
8
- "eval_runtime": 3.7663,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 14.338,
11
- "eval_steps_per_second": 1.593,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
@@ -17,9 +17,9 @@
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
- "train_loss": 0.23747550964355468,
21
- "train_runtime": 422.9436,
22
  "train_samples": 150,
23
- "train_samples_per_second": 9.458,
24
- "train_steps_per_second": 0.946
25
  }
 
1
  {
2
  "epoch": 26.67,
3
+ "eval_accuracy": 0.8414101975195223,
4
+ "eval_f1": 0.8868156150257795,
5
+ "eval_loss": 0.6016380190849304,
6
+ "eval_precision": 0.8712011577424024,
7
+ "eval_recall": 0.903,
8
+ "eval_runtime": 3.7201,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 14.516,
11
+ "eval_steps_per_second": 1.613,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
 
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
+ "train_loss": 0.3642783355712891,
21
+ "train_runtime": 422.552,
22
  "train_samples": 150,
23
+ "train_samples_per_second": 9.466,
24
+ "train_steps_per_second": 0.947
25
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 26.67,
3
- "eval_accuracy": 0.8472668810289389,
4
- "eval_f1": 0.9070631970260222,
5
- "eval_loss": 0.8292201161384583,
6
- "eval_precision": 0.8992628992628993,
7
- "eval_recall": 0.915,
8
- "eval_runtime": 3.7663,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 14.338,
11
- "eval_steps_per_second": 1.593
12
  }
 
1
  {
2
  "epoch": 26.67,
3
+ "eval_accuracy": 0.8414101975195223,
4
+ "eval_f1": 0.8868156150257795,
5
+ "eval_loss": 0.6016380190849304,
6
+ "eval_precision": 0.8712011577424024,
7
+ "eval_recall": 0.903,
8
+ "eval_runtime": 3.7201,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 14.516,
11
+ "eval_steps_per_second": 1.613
12
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cccafe3ee6c7777239da85ed0fd1a57b6880a4a472915b408a120b0a2f81081
3
  size 501420883
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8356f85a16aec70deef67c107e63fd773b89ce3c9d102cc38b7811b85a8d9f28
3
  size 501420883
runs/Aug24_18-54-42_bernini/1724518492.3371837/events.out.tfevents.1724518492.bernini.1037.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3cd699b00c94a44e6ca0e74ec37104488c83275ffaf0362c2ee3c990559b242
3
+ size 4665
runs/Aug24_18-54-42_bernini/events.out.tfevents.1724518492.bernini.1037.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4de822d225c11c0d9e01c4ac3b89baf5ba187ba9b2fe33fa6494c0bcf80bc713
3
+ size 11945
runs/Aug24_18-54-42_bernini/events.out.tfevents.1724518923.bernini.1037.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e1f5c7bbc9adcc5e99a82487c7c62900589be70d2ec018ffb7a5a5babb97b06
3
+ size 512
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 26.67,
3
- "train_loss": 0.23747550964355468,
4
- "train_runtime": 422.9436,
5
  "train_samples": 150,
6
- "train_samples_per_second": 9.458,
7
- "train_steps_per_second": 0.946
8
  }
 
1
  {
2
  "epoch": 26.67,
3
+ "train_loss": 0.3642783355712891,
4
+ "train_runtime": 422.552,
5
  "train_samples": 150,
6
+ "train_samples_per_second": 9.466,
7
+ "train_steps_per_second": 0.947
8
  }
trainer_state.json CHANGED
@@ -9,204 +9,204 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.67,
12
- "eval_accuracy": 0.743913642627469,
13
- "eval_f1": 0.728670634920635,
14
- "eval_loss": 0.8680385947227478,
15
- "eval_precision": 0.7229330708661418,
16
- "eval_recall": 0.7345,
17
- "eval_runtime": 3.6798,
18
- "eval_samples_per_second": 14.675,
19
- "eval_steps_per_second": 1.631,
20
  "step": 25
21
  },
22
  {
23
  "epoch": 3.33,
24
- "eval_accuracy": 0.821773082223243,
25
- "eval_f1": 0.8277523488316069,
26
- "eval_loss": 0.5806225538253784,
27
- "eval_precision": 0.798698279869828,
28
- "eval_recall": 0.859,
29
- "eval_runtime": 3.6893,
30
- "eval_samples_per_second": 14.637,
31
- "eval_steps_per_second": 1.626,
32
  "step": 50
33
  },
34
  {
35
  "epoch": 5.0,
36
- "eval_accuracy": 0.8143086816720257,
37
- "eval_f1": 0.8518784398181383,
38
- "eval_loss": 0.5963883399963379,
39
- "eval_precision": 0.8168884809545663,
40
- "eval_recall": 0.89,
41
- "eval_runtime": 3.7336,
42
- "eval_samples_per_second": 14.463,
43
- "eval_steps_per_second": 1.607,
44
  "step": 75
45
  },
46
  {
47
  "epoch": 6.67,
48
- "eval_accuracy": 0.8248736793752871,
49
- "eval_f1": 0.8816108685104318,
50
- "eval_loss": 0.5634802579879761,
51
- "eval_precision": 0.8562676720075401,
52
- "eval_recall": 0.9085,
53
- "eval_runtime": 3.7322,
54
- "eval_samples_per_second": 14.469,
55
- "eval_steps_per_second": 1.608,
56
  "step": 100
57
  },
58
  {
59
  "epoch": 8.33,
60
- "eval_accuracy": 0.8344051446945338,
61
- "eval_f1": 0.8806224167274495,
62
- "eval_loss": 0.6465535163879395,
63
- "eval_precision": 0.8570752484619025,
64
- "eval_recall": 0.9055,
65
- "eval_runtime": 3.6961,
66
- "eval_samples_per_second": 14.61,
67
- "eval_steps_per_second": 1.623,
68
  "step": 125
69
  },
70
  {
71
  "epoch": 10.0,
72
- "eval_accuracy": 0.8325677537896188,
73
- "eval_f1": 0.8935960591133005,
74
- "eval_loss": 0.6587353944778442,
75
- "eval_precision": 0.8805825242718447,
76
- "eval_recall": 0.907,
77
- "eval_runtime": 3.8161,
78
- "eval_samples_per_second": 14.151,
79
- "eval_steps_per_second": 1.572,
80
  "step": 150
81
  },
82
  {
83
  "epoch": 11.67,
84
- "eval_accuracy": 0.837620578778135,
85
- "eval_f1": 0.8977886977886977,
86
- "eval_loss": 0.698390543460846,
87
- "eval_precision": 0.8826086956521739,
88
- "eval_recall": 0.9135,
89
- "eval_runtime": 3.7001,
90
- "eval_samples_per_second": 14.594,
91
- "eval_steps_per_second": 1.622,
92
  "step": 175
93
  },
94
  {
95
  "epoch": 13.33,
96
- "eval_accuracy": 0.843706936150666,
97
- "eval_f1": 0.9129898940103525,
98
- "eval_loss": 0.696682870388031,
99
- "eval_precision": 0.9003403014098201,
100
- "eval_recall": 0.926,
101
- "eval_runtime": 3.7624,
102
- "eval_samples_per_second": 14.353,
103
- "eval_steps_per_second": 1.595,
104
  "step": 200
105
  },
106
  {
107
  "epoch": 15.0,
108
- "eval_accuracy": 0.8450849793293523,
109
- "eval_f1": 0.9041708043694141,
110
- "eval_loss": 0.7260191440582275,
111
- "eval_precision": 0.8979289940828402,
112
- "eval_recall": 0.9105,
113
- "eval_runtime": 3.7153,
114
- "eval_samples_per_second": 14.535,
115
- "eval_steps_per_second": 1.615,
116
  "step": 225
117
  },
118
  {
119
  "epoch": 16.67,
120
- "eval_accuracy": 0.8510564997703262,
121
- "eval_f1": 0.9003944773175543,
122
- "eval_loss": 0.7542734742164612,
123
- "eval_precision": 0.8881322957198443,
124
- "eval_recall": 0.913,
125
- "eval_runtime": 3.8842,
126
- "eval_samples_per_second": 13.903,
127
- "eval_steps_per_second": 1.545,
128
  "step": 250
129
  },
130
  {
131
  "epoch": 18.33,
132
- "eval_accuracy": 0.844855305466238,
133
- "eval_f1": 0.898422090729783,
134
- "eval_loss": 0.7764595746994019,
135
- "eval_precision": 0.8861867704280155,
136
- "eval_recall": 0.911,
137
- "eval_runtime": 3.7089,
138
- "eval_samples_per_second": 14.56,
139
- "eval_steps_per_second": 1.618,
140
  "step": 275
141
  },
142
  {
143
  "epoch": 20.0,
144
- "eval_accuracy": 0.8473817179604961,
145
- "eval_f1": 0.9038365719980069,
146
- "eval_loss": 0.8023993968963623,
147
- "eval_precision": 0.900695134061569,
148
- "eval_recall": 0.907,
149
- "eval_runtime": 3.6987,
150
- "eval_samples_per_second": 14.6,
151
- "eval_steps_per_second": 1.622,
152
  "step": 300
153
  },
154
  {
155
  "epoch": 21.67,
156
- "eval_accuracy": 0.8509416628387689,
157
- "eval_f1": 0.9098584554258753,
158
- "eval_loss": 0.814669132232666,
159
- "eval_precision": 0.9037987173162308,
160
- "eval_recall": 0.916,
161
- "eval_runtime": 3.712,
162
- "eval_samples_per_second": 14.547,
163
- "eval_steps_per_second": 1.616,
164
  "step": 325
165
  },
166
  {
167
  "epoch": 23.33,
168
- "eval_accuracy": 0.8510564997703262,
169
- "eval_f1": 0.9098136645962732,
170
- "eval_loss": 0.8133536577224731,
171
- "eval_precision": 0.9041975308641975,
172
- "eval_recall": 0.9155,
173
- "eval_runtime": 3.7319,
174
- "eval_samples_per_second": 14.47,
175
- "eval_steps_per_second": 1.608,
176
  "step": 350
177
  },
178
  {
179
  "epoch": 25.0,
180
- "eval_accuracy": 0.846692696371153,
181
- "eval_f1": 0.9083229813664596,
182
- "eval_loss": 0.8293821811676025,
183
- "eval_precision": 0.9027160493827161,
184
- "eval_recall": 0.914,
185
- "eval_runtime": 3.6785,
186
- "eval_samples_per_second": 14.68,
187
- "eval_steps_per_second": 1.631,
188
  "step": 375
189
  },
190
  {
191
  "epoch": 26.67,
192
- "eval_accuracy": 0.8472668810289389,
193
- "eval_f1": 0.9070631970260222,
194
- "eval_loss": 0.8292201161384583,
195
- "eval_precision": 0.8992628992628993,
196
- "eval_recall": 0.915,
197
- "eval_runtime": 3.7118,
198
- "eval_samples_per_second": 14.548,
199
- "eval_steps_per_second": 1.616,
200
  "step": 400
201
  },
202
  {
203
  "epoch": 26.67,
204
  "step": 400,
205
  "total_flos": 1054421372928000.0,
206
- "train_loss": 0.23747550964355468,
207
- "train_runtime": 422.9436,
208
- "train_samples_per_second": 9.458,
209
- "train_steps_per_second": 0.946
210
  }
211
  ],
212
  "max_steps": 400,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.67,
12
+ "eval_accuracy": 0.6659393661001378,
13
+ "eval_f1": 0.5035663338088445,
14
+ "eval_loss": 1.1944581270217896,
15
+ "eval_precision": 0.4800543970988214,
16
+ "eval_recall": 0.5295,
17
+ "eval_runtime": 3.6912,
18
+ "eval_samples_per_second": 14.629,
19
+ "eval_steps_per_second": 1.625,
20
  "step": 25
21
  },
22
  {
23
  "epoch": 3.33,
24
+ "eval_accuracy": 0.7687184198438217,
25
+ "eval_f1": 0.7759615384615385,
26
+ "eval_loss": 0.7770130038261414,
27
+ "eval_precision": 0.7472222222222222,
28
+ "eval_recall": 0.807,
29
+ "eval_runtime": 3.7057,
30
+ "eval_samples_per_second": 14.572,
31
+ "eval_steps_per_second": 1.619,
32
  "step": 50
33
  },
34
  {
35
  "epoch": 5.0,
36
+ "eval_accuracy": 0.8118971061093248,
37
+ "eval_f1": 0.8231029482841953,
38
+ "eval_loss": 0.5989910364151001,
39
+ "eval_precision": 0.7965388213283442,
40
+ "eval_recall": 0.8515,
41
+ "eval_runtime": 3.6962,
42
+ "eval_samples_per_second": 14.61,
43
+ "eval_steps_per_second": 1.623,
44
  "step": 75
45
  },
46
  {
47
  "epoch": 6.67,
48
+ "eval_accuracy": 0.7702112999540652,
49
+ "eval_f1": 0.8409471418320977,
50
+ "eval_loss": 0.6813620924949646,
51
+ "eval_precision": 0.8060522696011004,
52
+ "eval_recall": 0.879,
53
+ "eval_runtime": 3.7519,
54
+ "eval_samples_per_second": 14.393,
55
+ "eval_steps_per_second": 1.599,
56
  "step": 100
57
  },
58
  {
59
  "epoch": 8.33,
60
+ "eval_accuracy": 0.8316490583371612,
61
+ "eval_f1": 0.8467469879518073,
62
+ "eval_loss": 0.5391940474510193,
63
+ "eval_precision": 0.8172093023255814,
64
+ "eval_recall": 0.8785,
65
+ "eval_runtime": 3.7222,
66
+ "eval_samples_per_second": 14.508,
67
+ "eval_steps_per_second": 1.612,
68
  "step": 125
69
  },
70
  {
71
  "epoch": 10.0,
72
+ "eval_accuracy": 0.811437758383096,
73
+ "eval_f1": 0.8603297769156159,
74
+ "eval_loss": 0.5438030362129211,
75
+ "eval_precision": 0.835216572504708,
76
+ "eval_recall": 0.887,
77
+ "eval_runtime": 3.7662,
78
+ "eval_samples_per_second": 14.338,
79
+ "eval_steps_per_second": 1.593,
80
  "step": 150
81
  },
82
  {
83
  "epoch": 11.67,
84
+ "eval_accuracy": 0.838194763435921,
85
+ "eval_f1": 0.8752145133611179,
86
+ "eval_loss": 0.5429797172546387,
87
+ "eval_precision": 0.8585858585858586,
88
+ "eval_recall": 0.8925,
89
+ "eval_runtime": 3.7544,
90
+ "eval_samples_per_second": 14.383,
91
+ "eval_steps_per_second": 1.598,
92
  "step": 175
93
  },
94
  {
95
  "epoch": 13.33,
96
+ "eval_accuracy": 0.8257923748277446,
97
+ "eval_f1": 0.8691817736403724,
98
+ "eval_loss": 0.5896742939949036,
99
+ "eval_precision": 0.8520653218059558,
100
+ "eval_recall": 0.887,
101
+ "eval_runtime": 3.775,
102
+ "eval_samples_per_second": 14.305,
103
+ "eval_steps_per_second": 1.589,
104
  "step": 200
105
  },
106
  {
107
  "epoch": 15.0,
108
+ "eval_accuracy": 0.8430179145613229,
109
+ "eval_f1": 0.8809756097560977,
110
+ "eval_loss": 0.5427059531211853,
111
+ "eval_precision": 0.86,
112
+ "eval_recall": 0.903,
113
+ "eval_runtime": 3.6868,
114
+ "eval_samples_per_second": 14.647,
115
+ "eval_steps_per_second": 1.627,
116
  "step": 225
117
  },
118
  {
119
  "epoch": 16.67,
120
+ "eval_accuracy": 0.8410656867248507,
121
+ "eval_f1": 0.8822512959763021,
122
+ "eval_loss": 0.5541779398918152,
123
+ "eval_precision": 0.871282301316431,
124
+ "eval_recall": 0.8935,
125
+ "eval_runtime": 3.7479,
126
+ "eval_samples_per_second": 14.408,
127
+ "eval_steps_per_second": 1.601,
128
  "step": 250
129
  },
130
  {
131
  "epoch": 18.33,
132
+ "eval_accuracy": 0.83210840606339,
133
+ "eval_f1": 0.8821515892420537,
134
+ "eval_loss": 0.6013592481613159,
135
+ "eval_precision": 0.8631578947368421,
136
+ "eval_recall": 0.902,
137
+ "eval_runtime": 3.7532,
138
+ "eval_samples_per_second": 14.388,
139
+ "eval_steps_per_second": 1.599,
140
  "step": 275
141
  },
142
  {
143
  "epoch": 20.0,
144
+ "eval_accuracy": 0.8365870463941204,
145
+ "eval_f1": 0.8843036109064112,
146
+ "eval_loss": 0.5971384644508362,
147
+ "eval_precision": 0.8691453404152584,
148
+ "eval_recall": 0.9,
149
+ "eval_runtime": 3.7851,
150
+ "eval_samples_per_second": 14.267,
151
+ "eval_steps_per_second": 1.585,
152
  "step": 300
153
  },
154
  {
155
  "epoch": 21.67,
156
+ "eval_accuracy": 0.8313045475424896,
157
+ "eval_f1": 0.8891637803067788,
158
+ "eval_loss": 0.598324716091156,
159
+ "eval_precision": 0.8800195886385896,
160
+ "eval_recall": 0.8985,
161
+ "eval_runtime": 3.7401,
162
+ "eval_samples_per_second": 14.438,
163
+ "eval_steps_per_second": 1.604,
164
  "step": 325
165
  },
166
  {
167
  "epoch": 23.33,
168
+ "eval_accuracy": 0.8414101975195223,
169
+ "eval_f1": 0.8964497041420119,
170
+ "eval_loss": 0.5867139101028442,
171
+ "eval_precision": 0.8842412451361867,
172
+ "eval_recall": 0.909,
173
+ "eval_runtime": 3.7391,
174
+ "eval_samples_per_second": 14.442,
175
+ "eval_steps_per_second": 1.605,
176
  "step": 350
177
  },
178
  {
179
  "epoch": 25.0,
180
+ "eval_accuracy": 0.8465778594395957,
181
+ "eval_f1": 0.8928835262250677,
182
+ "eval_loss": 0.5947966575622559,
183
+ "eval_precision": 0.8796700630761766,
184
+ "eval_recall": 0.9065,
185
+ "eval_runtime": 3.6661,
186
+ "eval_samples_per_second": 14.73,
187
+ "eval_steps_per_second": 1.637,
188
  "step": 375
189
  },
190
  {
191
  "epoch": 26.67,
192
+ "eval_accuracy": 0.8414101975195223,
193
+ "eval_f1": 0.8868156150257795,
194
+ "eval_loss": 0.6016380190849304,
195
+ "eval_precision": 0.8712011577424024,
196
+ "eval_recall": 0.903,
197
+ "eval_runtime": 3.6984,
198
+ "eval_samples_per_second": 14.601,
199
+ "eval_steps_per_second": 1.622,
200
  "step": 400
201
  },
202
  {
203
  "epoch": 26.67,
204
  "step": 400,
205
  "total_flos": 1054421372928000.0,
206
+ "train_loss": 0.3642783355712891,
207
+ "train_runtime": 422.552,
208
+ "train_samples_per_second": 9.466,
209
+ "train_steps_per_second": 0.947
210
  }
211
  ],
212
  "max_steps": 400,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2966ff448a4928b45d06f166b511b270ce1009954dfb204cae97cf2a3dabe1ce
3
  size 2927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18524bbd71a9c1f456e0c326136d3674e674011210065da46f04684edfe11cdc
3
  size 2927