khleeloo commited on
Commit
043aa4a
1 Parent(s): ce2e948

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_accuracy": 0.7817460317460317,
4
- "eval_f1": 0.7722244420145986,
5
- "eval_loss": 0.8502413630485535,
6
- "eval_precision": 0.7756312792366583,
7
- "eval_recall": 0.7817460317460317,
8
- "eval_runtime": 10.0761,
9
- "eval_samples_per_second": 150.058,
10
- "eval_steps_per_second": 18.757,
11
- "total_flos": 3.104468219559813e+18,
12
- "train_loss": 0.0725347773061683,
13
- "train_runtime": 849.1916,
14
- "train_samples_per_second": 47.174,
15
- "train_steps_per_second": 2.949
16
  }
 
1
  {
2
+ "epoch": 6.0,
3
+ "eval_accuracy": 0.7936507936507936,
4
+ "eval_f1": 0.7835889985346738,
5
+ "eval_loss": 0.8658342361450195,
6
+ "eval_precision": 0.7871091627862145,
7
+ "eval_recall": 0.7936507936507936,
8
+ "eval_runtime": 10.1227,
9
+ "eval_samples_per_second": 149.367,
10
+ "eval_steps_per_second": 18.671,
11
+ "total_flos": 4.65670232933972e+18,
12
+ "train_loss": 0.08256206865654514,
13
+ "train_runtime": 1281.0434,
14
+ "train_samples_per_second": 46.907,
15
+ "train_steps_per_second": 2.932
16
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 4.0,
3
- "eval_accuracy": 0.7817460317460317,
4
- "eval_f1": 0.7722244420145986,
5
- "eval_loss": 0.8502413630485535,
6
- "eval_precision": 0.7756312792366583,
7
- "eval_recall": 0.7817460317460317,
8
- "eval_runtime": 10.0761,
9
- "eval_samples_per_second": 150.058,
10
- "eval_steps_per_second": 18.757
11
  }
 
1
  {
2
+ "epoch": 6.0,
3
+ "eval_accuracy": 0.7936507936507936,
4
+ "eval_f1": 0.7835889985346738,
5
+ "eval_loss": 0.8658342361450195,
6
+ "eval_precision": 0.7871091627862145,
7
+ "eval_recall": 0.7936507936507936,
8
+ "eval_runtime": 10.1227,
9
+ "eval_samples_per_second": 149.367,
10
+ "eval_steps_per_second": 18.671
11
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ff8df35ed9c0f2e1a3baa63ab45bfd09e01e66d8adc8a61b9e471690a8a1a64
3
  size 343284077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b40294c6f0c3d75ff4e234d0988917afeb994bc1a11260eda81b3c699f1da944
3
  size 343284077
runs/Oct27_22-15-51_EE4E077/events.out.tfevents.1698417528.EE4E077.103115.11 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e803cfe837b8cd4a2307dae00863c71a3b495cab291c1a333d621d8ffdc74972
3
+ size 560
runs/Oct28_14-32-10_EE4E077/1698474732.9403577/events.out.tfevents.1698474732.EE4E077.316238.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca4951d7e3c2a69382754eabb6317043f0de4de012b8e89d3f297488fe8dec50
3
+ size 5919
runs/Oct28_14-32-10_EE4E077/events.out.tfevents.1698474732.EE4E077.316238.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1e4778eed05fd65b2062c89924b6b893b340c4734f918166770d1b45ffd1fe5
3
+ size 6598
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.0,
3
- "total_flos": 3.104468219559813e+18,
4
- "train_loss": 0.0725347773061683,
5
- "train_runtime": 849.1916,
6
- "train_samples_per_second": 47.174,
7
- "train_steps_per_second": 2.949
8
  }
 
1
  {
2
+ "epoch": 6.0,
3
+ "total_flos": 4.65670232933972e+18,
4
+ "train_loss": 0.08256206865654514,
5
+ "train_runtime": 1281.0434,
6
+ "train_samples_per_second": 46.907,
7
+ "train_steps_per_second": 2.932
8
  }
trainer_state.json CHANGED
@@ -1,223 +1,547 @@
1
  {
2
  "best_metric": 0.8704663212435233,
3
- "best_model_checkpoint": "./vit-focal-skin/checkpoint-1878",
4
- "epoch": 4.0,
5
- "global_step": 2504,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
10
  {
11
  "epoch": 0.16,
12
- "learning_rate": 0.00019201277955271565,
13
- "loss": 0.1663,
14
  "step": 100
15
  },
 
 
 
 
 
 
16
  {
17
  "epoch": 0.32,
18
- "learning_rate": 0.00018402555910543132,
19
- "loss": 0.1962,
20
  "step": 200
21
  },
 
 
 
 
 
 
22
  {
23
  "epoch": 0.48,
24
- "learning_rate": 0.000176038338658147,
25
- "loss": 0.1853,
26
  "step": 300
27
  },
 
 
 
 
 
 
28
  {
29
  "epoch": 0.64,
30
- "learning_rate": 0.00016805111821086263,
31
- "loss": 0.1706,
32
  "step": 400
33
  },
 
 
 
 
 
 
34
  {
35
  "epoch": 0.8,
36
- "learning_rate": 0.0001600638977635783,
37
- "loss": 0.1593,
38
  "step": 500
39
  },
 
 
 
 
 
 
40
  {
41
  "epoch": 0.96,
42
- "learning_rate": 0.00015207667731629394,
43
- "loss": 0.1702,
44
  "step": 600
45
  },
46
  {
47
  "epoch": 1.0,
48
- "eval_accuracy": 0.8393782383419689,
49
- "eval_f1": 0.8380951982999452,
50
- "eval_loss": 0.39222732186317444,
51
- "eval_precision": 0.857762154943502,
52
- "eval_recall": 0.8393782383419689,
53
- "eval_runtime": 1.5181,
54
- "eval_samples_per_second": 127.129,
55
- "eval_steps_per_second": 16.467,
56
  "step": 626
57
  },
 
 
 
 
 
 
58
  {
59
  "epoch": 1.12,
60
- "learning_rate": 0.00014408945686900958,
61
- "loss": 0.1036,
62
  "step": 700
63
  },
 
 
 
 
 
 
64
  {
65
  "epoch": 1.28,
66
- "learning_rate": 0.00013610223642172525,
67
- "loss": 0.0941,
68
  "step": 800
69
  },
 
 
 
 
 
 
70
  {
71
  "epoch": 1.44,
72
- "learning_rate": 0.00012819488817891373,
73
- "loss": 0.1029,
74
  "step": 900
75
  },
 
 
 
 
 
 
76
  {
77
  "epoch": 1.6,
78
- "learning_rate": 0.0001202076677316294,
79
- "loss": 0.095,
80
  "step": 1000
81
  },
 
 
 
 
 
 
82
  {
83
  "epoch": 1.76,
84
- "learning_rate": 0.00011222044728434504,
85
- "loss": 0.0911,
86
  "step": 1100
87
  },
 
 
 
 
 
 
88
  {
89
  "epoch": 1.92,
90
- "learning_rate": 0.00010423322683706072,
91
- "loss": 0.0647,
92
  "step": 1200
93
  },
 
 
 
 
 
 
94
  {
95
  "epoch": 2.0,
96
  "eval_accuracy": 0.8238341968911918,
97
- "eval_f1": 0.8247834676883651,
98
- "eval_loss": 0.561523973941803,
99
- "eval_precision": 0.8404391615022521,
100
  "eval_recall": 0.8238341968911918,
101
- "eval_runtime": 1.446,
102
- "eval_samples_per_second": 133.47,
103
- "eval_steps_per_second": 17.289,
104
  "step": 1252
105
  },
106
  {
107
  "epoch": 2.08,
108
- "learning_rate": 9.624600638977636e-05,
109
- "loss": 0.052,
110
  "step": 1300
111
  },
 
 
 
 
 
 
112
  {
113
  "epoch": 2.24,
114
- "learning_rate": 8.825878594249202e-05,
115
- "loss": 0.0298,
116
  "step": 1400
117
  },
 
 
 
 
 
 
118
  {
119
  "epoch": 2.4,
120
- "learning_rate": 8.027156549520767e-05,
121
- "loss": 0.0243,
122
  "step": 1500
123
  },
 
 
 
 
 
 
124
  {
125
  "epoch": 2.56,
126
- "learning_rate": 7.228434504792333e-05,
127
- "loss": 0.0332,
128
  "step": 1600
129
  },
 
 
 
 
 
 
130
  {
131
  "epoch": 2.72,
132
- "learning_rate": 6.429712460063898e-05,
133
- "loss": 0.0267,
134
  "step": 1700
135
  },
 
 
 
 
 
 
136
  {
137
  "epoch": 2.88,
138
- "learning_rate": 5.630990415335463e-05,
139
- "loss": 0.0111,
140
  "step": 1800
141
  },
 
 
 
 
 
 
142
  {
143
  "epoch": 3.0,
144
- "eval_accuracy": 0.8704663212435233,
145
- "eval_f1": 0.868408303886561,
146
- "eval_loss": 0.4315575659275055,
147
- "eval_precision": 0.8670271155479244,
148
- "eval_recall": 0.8704663212435233,
149
- "eval_runtime": 1.4834,
150
- "eval_samples_per_second": 130.104,
151
- "eval_steps_per_second": 16.853,
152
  "step": 1878
153
  },
154
  {
155
  "epoch": 3.04,
156
- "learning_rate": 4.832268370607029e-05,
157
- "loss": 0.0175,
158
  "step": 1900
159
  },
 
 
 
 
 
 
160
  {
161
  "epoch": 3.19,
162
- "learning_rate": 4.0335463258785946e-05,
163
- "loss": 0.0083,
164
  "step": 2000
165
  },
 
 
 
 
 
 
166
  {
167
  "epoch": 3.35,
168
- "learning_rate": 3.23482428115016e-05,
169
- "loss": 0.0038,
170
  "step": 2100
171
  },
 
 
 
 
 
 
172
  {
173
  "epoch": 3.51,
174
- "learning_rate": 2.4361022364217255e-05,
175
- "loss": 0.0009,
176
  "step": 2200
177
  },
 
 
 
 
 
 
178
  {
179
  "epoch": 3.67,
180
- "learning_rate": 1.6373801916932906e-05,
181
- "loss": 0.0034,
182
  "step": 2300
183
  },
 
 
 
 
 
 
184
  {
185
  "epoch": 3.83,
186
- "learning_rate": 8.386581469648563e-06,
187
- "loss": 0.0025,
188
  "step": 2400
189
  },
 
 
 
 
 
 
190
  {
191
  "epoch": 3.99,
192
- "learning_rate": 3.9936102236421723e-07,
193
- "loss": 0.0034,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 4.0,
198
- "eval_accuracy": 0.8601036269430051,
199
- "eval_f1": 0.8616514102008719,
200
- "eval_loss": 0.4513249099254608,
201
- "eval_precision": 0.8650015326151586,
202
- "eval_recall": 0.8601036269430051,
203
- "eval_runtime": 1.498,
204
- "eval_samples_per_second": 128.838,
205
- "eval_steps_per_second": 16.689,
206
  "step": 2504
207
  },
208
  {
209
- "epoch": 4.0,
210
- "step": 2504,
211
- "total_flos": 3.104468219559813e+18,
212
- "train_loss": 0.0725347773061683,
213
- "train_runtime": 849.1916,
214
- "train_samples_per_second": 47.174,
215
- "train_steps_per_second": 2.949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  }
217
  ],
218
- "max_steps": 2504,
219
- "num_train_epochs": 4,
220
- "total_flos": 3.104468219559813e+18,
221
  "trial_name": null,
222
  "trial_params": null
223
  }
 
1
  {
2
  "best_metric": 0.8704663212435233,
3
+ "best_model_checkpoint": "./vit-focal-skin/checkpoint-3130",
4
+ "epoch": 6.0,
5
+ "global_step": 3756,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 0.00019733759318423856,
13
+ "loss": 0.325,
14
+ "step": 50
15
+ },
16
  {
17
  "epoch": 0.16,
18
+ "learning_rate": 0.0001946751863684771,
19
+ "loss": 0.2897,
20
  "step": 100
21
  },
22
+ {
23
+ "epoch": 0.24,
24
+ "learning_rate": 0.00019201277955271565,
25
+ "loss": 0.2508,
26
+ "step": 150
27
+ },
28
  {
29
  "epoch": 0.32,
30
+ "learning_rate": 0.00018935037273695422,
31
+ "loss": 0.2626,
32
  "step": 200
33
  },
34
+ {
35
+ "epoch": 0.4,
36
+ "learning_rate": 0.00018668796592119277,
37
+ "loss": 0.2308,
38
+ "step": 250
39
+ },
40
  {
41
  "epoch": 0.48,
42
+ "learning_rate": 0.00018402555910543132,
43
+ "loss": 0.2437,
44
  "step": 300
45
  },
46
+ {
47
+ "epoch": 0.56,
48
+ "learning_rate": 0.00018136315228966987,
49
+ "loss": 0.2594,
50
+ "step": 350
51
+ },
52
  {
53
  "epoch": 0.64,
54
+ "learning_rate": 0.0001787007454739084,
55
+ "loss": 0.2355,
56
  "step": 400
57
  },
58
+ {
59
+ "epoch": 0.72,
60
+ "learning_rate": 0.000176038338658147,
61
+ "loss": 0.2457,
62
+ "step": 450
63
+ },
64
  {
65
  "epoch": 0.8,
66
+ "learning_rate": 0.0001733759318423855,
67
+ "loss": 0.2136,
68
  "step": 500
69
  },
70
+ {
71
+ "epoch": 0.88,
72
+ "learning_rate": 0.00017071352502662408,
73
+ "loss": 0.2622,
74
+ "step": 550
75
+ },
76
  {
77
  "epoch": 0.96,
78
+ "learning_rate": 0.00016805111821086263,
79
+ "loss": 0.1966,
80
  "step": 600
81
  },
82
  {
83
  "epoch": 1.0,
84
+ "eval_accuracy": 0.8290155440414507,
85
+ "eval_f1": 0.8306733167328754,
86
+ "eval_loss": 0.3647349774837494,
87
+ "eval_precision": 0.8430511449105799,
88
+ "eval_recall": 0.8290155440414507,
89
+ "eval_runtime": 1.4278,
90
+ "eval_samples_per_second": 135.17,
91
+ "eval_steps_per_second": 17.509,
92
  "step": 626
93
  },
94
+ {
95
+ "epoch": 1.04,
96
+ "learning_rate": 0.00016538871139510118,
97
+ "loss": 0.1998,
98
+ "step": 650
99
+ },
100
  {
101
  "epoch": 1.12,
102
+ "learning_rate": 0.00016272630457933972,
103
+ "loss": 0.1312,
104
  "step": 700
105
  },
106
+ {
107
+ "epoch": 1.2,
108
+ "learning_rate": 0.0001600638977635783,
109
+ "loss": 0.1477,
110
+ "step": 750
111
+ },
112
  {
113
  "epoch": 1.28,
114
+ "learning_rate": 0.00015740149094781684,
115
+ "loss": 0.1155,
116
  "step": 800
117
  },
118
+ {
119
+ "epoch": 1.36,
120
+ "learning_rate": 0.0001547390841320554,
121
+ "loss": 0.1591,
122
+ "step": 850
123
+ },
124
  {
125
  "epoch": 1.44,
126
+ "learning_rate": 0.00015207667731629394,
127
+ "loss": 0.1568,
128
  "step": 900
129
  },
130
+ {
131
+ "epoch": 1.52,
132
+ "learning_rate": 0.00014941427050053249,
133
+ "loss": 0.1322,
134
+ "step": 950
135
+ },
136
  {
137
  "epoch": 1.6,
138
+ "learning_rate": 0.00014675186368477103,
139
+ "loss": 0.1478,
140
  "step": 1000
141
  },
142
+ {
143
+ "epoch": 1.68,
144
+ "learning_rate": 0.00014408945686900958,
145
+ "loss": 0.1145,
146
+ "step": 1050
147
+ },
148
  {
149
  "epoch": 1.76,
150
+ "learning_rate": 0.00014142705005324815,
151
+ "loss": 0.1483,
152
  "step": 1100
153
  },
154
+ {
155
+ "epoch": 1.84,
156
+ "learning_rate": 0.0001387646432374867,
157
+ "loss": 0.1517,
158
+ "step": 1150
159
+ },
160
  {
161
  "epoch": 1.92,
162
+ "learning_rate": 0.00013610223642172525,
163
+ "loss": 0.1294,
164
  "step": 1200
165
  },
166
+ {
167
+ "epoch": 2.0,
168
+ "learning_rate": 0.0001334398296059638,
169
+ "loss": 0.1434,
170
+ "step": 1250
171
+ },
172
  {
173
  "epoch": 2.0,
174
  "eval_accuracy": 0.8238341968911918,
175
+ "eval_f1": 0.825913518485432,
176
+ "eval_loss": 0.38836923241615295,
177
+ "eval_precision": 0.8418087741274269,
178
  "eval_recall": 0.8238341968911918,
179
+ "eval_runtime": 1.4008,
180
+ "eval_samples_per_second": 137.782,
181
+ "eval_steps_per_second": 17.847,
182
  "step": 1252
183
  },
184
  {
185
  "epoch": 2.08,
186
+ "learning_rate": 0.00013077742279020234,
187
+ "loss": 0.0846,
188
  "step": 1300
189
  },
190
+ {
191
+ "epoch": 2.16,
192
+ "learning_rate": 0.00012811501597444092,
193
+ "loss": 0.0635,
194
+ "step": 1350
195
+ },
196
  {
197
  "epoch": 2.24,
198
+ "learning_rate": 0.00012545260915867944,
199
+ "loss": 0.0466,
200
  "step": 1400
201
  },
202
+ {
203
+ "epoch": 2.32,
204
+ "learning_rate": 0.000122790202342918,
205
+ "loss": 0.0757,
206
+ "step": 1450
207
+ },
208
  {
209
  "epoch": 2.4,
210
+ "learning_rate": 0.00012012779552715656,
211
+ "loss": 0.0996,
212
  "step": 1500
213
  },
214
+ {
215
+ "epoch": 2.48,
216
+ "learning_rate": 0.00011746538871139509,
217
+ "loss": 0.089,
218
+ "step": 1550
219
+ },
220
  {
221
  "epoch": 2.56,
222
+ "learning_rate": 0.00011480298189563365,
223
+ "loss": 0.0679,
224
  "step": 1600
225
  },
226
+ {
227
+ "epoch": 2.64,
228
+ "learning_rate": 0.00011214057507987221,
229
+ "loss": 0.0747,
230
+ "step": 1650
231
+ },
232
  {
233
  "epoch": 2.72,
234
+ "learning_rate": 0.00010947816826411077,
235
+ "loss": 0.066,
236
  "step": 1700
237
  },
238
+ {
239
+ "epoch": 2.8,
240
+ "learning_rate": 0.0001068157614483493,
241
+ "loss": 0.0714,
242
+ "step": 1750
243
+ },
244
  {
245
  "epoch": 2.88,
246
+ "learning_rate": 0.00010415335463258787,
247
+ "loss": 0.0776,
248
  "step": 1800
249
  },
250
+ {
251
+ "epoch": 2.96,
252
+ "learning_rate": 0.00010149094781682643,
253
+ "loss": 0.058,
254
+ "step": 1850
255
+ },
256
  {
257
  "epoch": 3.0,
258
+ "eval_accuracy": 0.8186528497409327,
259
+ "eval_f1": 0.8136687258538028,
260
+ "eval_loss": 0.5063741207122803,
261
+ "eval_precision": 0.8183198316271921,
262
+ "eval_recall": 0.8186528497409327,
263
+ "eval_runtime": 1.4832,
264
+ "eval_samples_per_second": 130.125,
265
+ "eval_steps_per_second": 16.856,
266
  "step": 1878
267
  },
268
  {
269
  "epoch": 3.04,
270
+ "learning_rate": 9.882854100106496e-05,
271
+ "loss": 0.0361,
272
  "step": 1900
273
  },
274
+ {
275
+ "epoch": 3.12,
276
+ "learning_rate": 9.616613418530351e-05,
277
+ "loss": 0.0305,
278
+ "step": 1950
279
+ },
280
  {
281
  "epoch": 3.19,
282
+ "learning_rate": 9.350372736954207e-05,
283
+ "loss": 0.0224,
284
  "step": 2000
285
  },
286
+ {
287
+ "epoch": 3.27,
288
+ "learning_rate": 9.084132055378062e-05,
289
+ "loss": 0.0186,
290
+ "step": 2050
291
+ },
292
  {
293
  "epoch": 3.35,
294
+ "learning_rate": 8.817891373801918e-05,
295
+ "loss": 0.0138,
296
  "step": 2100
297
  },
298
+ {
299
+ "epoch": 3.43,
300
+ "learning_rate": 8.556975505857295e-05,
301
+ "loss": 0.0344,
302
+ "step": 2150
303
+ },
304
  {
305
  "epoch": 3.51,
306
+ "learning_rate": 8.29073482428115e-05,
307
+ "loss": 0.0158,
308
  "step": 2200
309
  },
310
+ {
311
+ "epoch": 3.59,
312
+ "learning_rate": 8.029818956336529e-05,
313
+ "loss": 0.0557,
314
+ "step": 2250
315
+ },
316
  {
317
  "epoch": 3.67,
318
+ "learning_rate": 7.763578274760383e-05,
319
+ "loss": 0.0409,
320
  "step": 2300
321
  },
322
+ {
323
+ "epoch": 3.75,
324
+ "learning_rate": 7.49733759318424e-05,
325
+ "loss": 0.0374,
326
+ "step": 2350
327
+ },
328
  {
329
  "epoch": 3.83,
330
+ "learning_rate": 7.231096911608094e-05,
331
+ "loss": 0.0301,
332
  "step": 2400
333
  },
334
+ {
335
+ "epoch": 3.91,
336
+ "learning_rate": 6.96485623003195e-05,
337
+ "loss": 0.02,
338
+ "step": 2450
339
+ },
340
  {
341
  "epoch": 3.99,
342
+ "learning_rate": 6.698615548455805e-05,
343
+ "loss": 0.02,
344
  "step": 2500
345
  },
346
  {
347
  "epoch": 4.0,
348
+ "eval_accuracy": 0.8393782383419689,
349
+ "eval_f1": 0.8431497010182372,
350
+ "eval_loss": 0.5476517081260681,
351
+ "eval_precision": 0.8537832767554047,
352
+ "eval_recall": 0.8393782383419689,
353
+ "eval_runtime": 1.5001,
354
+ "eval_samples_per_second": 128.658,
355
+ "eval_steps_per_second": 16.666,
356
  "step": 2504
357
  },
358
  {
359
+ "epoch": 4.07,
360
+ "learning_rate": 6.43237486687966e-05,
361
+ "loss": 0.0096,
362
+ "step": 2550
363
+ },
364
+ {
365
+ "epoch": 4.15,
366
+ "learning_rate": 6.166134185303514e-05,
367
+ "loss": 0.0025,
368
+ "step": 2600
369
+ },
370
+ {
371
+ "epoch": 4.23,
372
+ "learning_rate": 5.8998935037273696e-05,
373
+ "loss": 0.0031,
374
+ "step": 2650
375
+ },
376
+ {
377
+ "epoch": 4.31,
378
+ "learning_rate": 5.633652822151225e-05,
379
+ "loss": 0.0018,
380
+ "step": 2700
381
+ },
382
+ {
383
+ "epoch": 4.39,
384
+ "learning_rate": 5.36741214057508e-05,
385
+ "loss": 0.0035,
386
+ "step": 2750
387
+ },
388
+ {
389
+ "epoch": 4.47,
390
+ "learning_rate": 5.101171458998936e-05,
391
+ "loss": 0.0042,
392
+ "step": 2800
393
+ },
394
+ {
395
+ "epoch": 4.55,
396
+ "learning_rate": 4.8349307774227905e-05,
397
+ "loss": 0.0046,
398
+ "step": 2850
399
+ },
400
+ {
401
+ "epoch": 4.63,
402
+ "learning_rate": 4.568690095846646e-05,
403
+ "loss": 0.002,
404
+ "step": 2900
405
+ },
406
+ {
407
+ "epoch": 4.71,
408
+ "learning_rate": 4.3024494142705005e-05,
409
+ "loss": 0.0039,
410
+ "step": 2950
411
+ },
412
+ {
413
+ "epoch": 4.79,
414
+ "learning_rate": 4.036208732694356e-05,
415
+ "loss": 0.0011,
416
+ "step": 3000
417
+ },
418
+ {
419
+ "epoch": 4.87,
420
+ "learning_rate": 3.769968051118211e-05,
421
+ "loss": 0.0106,
422
+ "step": 3050
423
+ },
424
+ {
425
+ "epoch": 4.95,
426
+ "learning_rate": 3.503727369542067e-05,
427
+ "loss": 0.0018,
428
+ "step": 3100
429
+ },
430
+ {
431
+ "epoch": 5.0,
432
+ "eval_accuracy": 0.8704663212435233,
433
+ "eval_f1": 0.8749284792904506,
434
+ "eval_loss": 0.48757874965667725,
435
+ "eval_precision": 0.8863835430545274,
436
+ "eval_recall": 0.8704663212435233,
437
+ "eval_runtime": 1.4425,
438
+ "eval_samples_per_second": 133.795,
439
+ "eval_steps_per_second": 17.331,
440
+ "step": 3130
441
+ },
442
+ {
443
+ "epoch": 5.03,
444
+ "learning_rate": 3.2374866879659214e-05,
445
+ "loss": 0.0037,
446
+ "step": 3150
447
+ },
448
+ {
449
+ "epoch": 5.11,
450
+ "learning_rate": 2.971246006389776e-05,
451
+ "loss": 0.0004,
452
+ "step": 3200
453
+ },
454
+ {
455
+ "epoch": 5.19,
456
+ "learning_rate": 2.7050053248136315e-05,
457
+ "loss": 0.0004,
458
+ "step": 3250
459
+ },
460
+ {
461
+ "epoch": 5.27,
462
+ "learning_rate": 2.438764643237487e-05,
463
+ "loss": 0.0004,
464
+ "step": 3300
465
+ },
466
+ {
467
+ "epoch": 5.35,
468
+ "learning_rate": 2.172523961661342e-05,
469
+ "loss": 0.0003,
470
+ "step": 3350
471
+ },
472
+ {
473
+ "epoch": 5.43,
474
+ "learning_rate": 1.906283280085197e-05,
475
+ "loss": 0.0014,
476
+ "step": 3400
477
+ },
478
+ {
479
+ "epoch": 5.51,
480
+ "learning_rate": 1.6400425985090524e-05,
481
+ "loss": 0.0004,
482
+ "step": 3450
483
+ },
484
+ {
485
+ "epoch": 5.59,
486
+ "learning_rate": 1.3738019169329076e-05,
487
+ "loss": 0.0004,
488
+ "step": 3500
489
+ },
490
+ {
491
+ "epoch": 5.67,
492
+ "learning_rate": 1.1075612353567626e-05,
493
+ "loss": 0.0014,
494
+ "step": 3550
495
+ },
496
+ {
497
+ "epoch": 5.75,
498
+ "learning_rate": 8.413205537806178e-06,
499
+ "loss": 0.0004,
500
+ "step": 3600
501
+ },
502
+ {
503
+ "epoch": 5.83,
504
+ "learning_rate": 5.750798722044729e-06,
505
+ "loss": 0.0003,
506
+ "step": 3650
507
+ },
508
+ {
509
+ "epoch": 5.91,
510
+ "learning_rate": 3.08839190628328e-06,
511
+ "loss": 0.0003,
512
+ "step": 3700
513
+ },
514
+ {
515
+ "epoch": 5.99,
516
+ "learning_rate": 4.259850905218318e-07,
517
+ "loss": 0.0003,
518
+ "step": 3750
519
+ },
520
+ {
521
+ "epoch": 6.0,
522
+ "eval_accuracy": 0.8704663212435233,
523
+ "eval_f1": 0.8761642097552051,
524
+ "eval_loss": 0.4871196746826172,
525
+ "eval_precision": 0.8862443826726218,
526
+ "eval_recall": 0.8704663212435233,
527
+ "eval_runtime": 1.5001,
528
+ "eval_samples_per_second": 128.657,
529
+ "eval_steps_per_second": 16.665,
530
+ "step": 3756
531
+ },
532
+ {
533
+ "epoch": 6.0,
534
+ "step": 3756,
535
+ "total_flos": 4.65670232933972e+18,
536
+ "train_loss": 0.08256206865654514,
537
+ "train_runtime": 1281.0434,
538
+ "train_samples_per_second": 46.907,
539
+ "train_steps_per_second": 2.932
540
  }
541
  ],
542
+ "max_steps": 3756,
543
+ "num_train_epochs": 6,
544
+ "total_flos": 4.65670232933972e+18,
545
  "trial_name": null,
546
  "trial_params": null
547
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f657e798e7f77936234299466c6296e6c9fba3c3d1d364c041c99c8e3780c6a
3
  size 3899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a31e83d843e19f25dcc3e3bb54479c7d12ca68324f3971ccbd3897138f4357e9
3
  size 3899