vintage-lavender619 commited on
Commit
1ac3394
·
verified ·
1 Parent(s): 7ce05b3

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_accuracy": 0.878125,
4
- "eval_loss": 0.3683047890663147,
5
- "eval_runtime": 3.1898,
6
- "eval_samples_per_second": 100.318,
7
- "eval_steps_per_second": 3.135,
8
  "total_flos": 1.4878728707899392e+18,
9
- "train_loss": 0.48002211888631185,
10
- "train_runtime": 441.2201,
11
- "train_samples_per_second": 43.516,
12
- "train_steps_per_second": 0.34
13
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_accuracy": 0.915625,
4
+ "eval_loss": 0.26764923334121704,
5
+ "eval_runtime": 3.0974,
6
+ "eval_samples_per_second": 103.313,
7
+ "eval_steps_per_second": 3.229,
8
  "total_flos": 1.4878728707899392e+18,
9
+ "train_loss": 0.5071320374806721,
10
+ "train_runtime": 447.0543,
11
+ "train_samples_per_second": 42.948,
12
+ "train_steps_per_second": 0.336
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_accuracy": 0.878125,
4
- "eval_loss": 0.3683047890663147,
5
- "eval_runtime": 3.1898,
6
- "eval_samples_per_second": 100.318,
7
- "eval_steps_per_second": 3.135
8
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_accuracy": 0.915625,
4
+ "eval_loss": 0.26764923334121704,
5
+ "eval_runtime": 3.0974,
6
+ "eval_samples_per_second": 103.313,
7
+ "eval_steps_per_second": 3.229
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f34d01985a18e902e48126e003c0965271de1fa3383bf05b716052d24342807
3
  size 343230128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec05dc7a171a3910360de9c6a6fa6a85f219f35e518be559130a0dab16a73079
3
  size 343230128
runs/Jun10_11-47-37_4c61f7eac1f1/events.out.tfevents.1718020528.4c61f7eac1f1.793.18 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b162a90e2a6681625d9fc2ece697b1f8d3d9cea566418381ee39e89f5b8c96c8
3
+ size 411
runs/Jun10_11-55-42_4c61f7eac1f1/events.out.tfevents.1718020543.4c61f7eac1f1.793.19 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5af562d889766781708469650d888b3c24a70a636ecc5be2c660895adfbce5f
3
+ size 5440
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
  "total_flos": 1.4878728707899392e+18,
4
- "train_loss": 0.48002211888631185,
5
- "train_runtime": 441.2201,
6
- "train_samples_per_second": 43.516,
7
- "train_steps_per_second": 0.34
8
  }
 
1
  {
2
  "epoch": 15.0,
3
  "total_flos": 1.4878728707899392e+18,
4
+ "train_loss": 0.5071320374806721,
5
+ "train_runtime": 447.0543,
6
+ "train_samples_per_second": 42.948,
7
+ "train_steps_per_second": 0.336
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.878125,
3
- "best_model_checkpoint": "vit-base-patch16-224-finalterm/checkpoint-100",
4
  "epoch": 15.0,
5
  "eval_steps": 500,
6
  "global_step": 150,
@@ -10,252 +10,252 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 2.554522752761841,
14
  "learning_rate": 3.3333333333333335e-05,
15
- "loss": 1.4259,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.44375,
21
- "eval_loss": 1.2691426277160645,
22
- "eval_runtime": 3.2172,
23
- "eval_samples_per_second": 99.467,
24
- "eval_steps_per_second": 3.108,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 3.0016026496887207,
30
  "learning_rate": 4.814814814814815e-05,
31
- "loss": 1.011,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.64375,
37
- "eval_loss": 0.8635731935501099,
38
- "eval_runtime": 3.1996,
39
- "eval_samples_per_second": 100.014,
40
- "eval_steps_per_second": 3.125,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 2.644876718521118,
46
  "learning_rate": 4.4444444444444447e-05,
47
- "loss": 0.6451,
48
  "step": 30
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.753125,
53
- "eval_loss": 0.6140075922012329,
54
- "eval_runtime": 3.2123,
55
- "eval_samples_per_second": 99.618,
56
- "eval_steps_per_second": 3.113,
57
  "step": 30
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 4.116305828094482,
62
  "learning_rate": 4.074074074074074e-05,
63
- "loss": 0.4746,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.80625,
69
- "eval_loss": 0.5410074591636658,
70
- "eval_runtime": 3.2651,
71
- "eval_samples_per_second": 98.005,
72
- "eval_steps_per_second": 3.063,
73
  "step": 40
74
  },
75
  {
76
  "epoch": 5.0,
77
- "grad_norm": 2.129575490951538,
78
  "learning_rate": 3.7037037037037037e-05,
79
- "loss": 0.4118,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_accuracy": 0.840625,
85
- "eval_loss": 0.4661463797092438,
86
- "eval_runtime": 3.2008,
87
- "eval_samples_per_second": 99.974,
88
- "eval_steps_per_second": 3.124,
89
  "step": 50
90
  },
91
  {
92
  "epoch": 6.0,
93
- "grad_norm": 3.7490148544311523,
94
  "learning_rate": 3.3333333333333335e-05,
95
- "loss": 0.4047,
96
  "step": 60
97
  },
98
  {
99
  "epoch": 6.0,
100
- "eval_accuracy": 0.81875,
101
- "eval_loss": 0.4901629388332367,
102
- "eval_runtime": 3.2025,
103
- "eval_samples_per_second": 99.921,
104
- "eval_steps_per_second": 3.123,
105
  "step": 60
106
  },
107
  {
108
  "epoch": 7.0,
109
- "grad_norm": 2.4626872539520264,
110
  "learning_rate": 2.962962962962963e-05,
111
- "loss": 0.3728,
112
  "step": 70
113
  },
114
  {
115
  "epoch": 7.0,
116
- "eval_accuracy": 0.85,
117
- "eval_loss": 0.40885186195373535,
118
- "eval_runtime": 3.1941,
119
- "eval_samples_per_second": 100.185,
120
- "eval_steps_per_second": 3.131,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 8.0,
125
- "grad_norm": 1.6122300624847412,
126
  "learning_rate": 2.5925925925925925e-05,
127
- "loss": 0.3445,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 8.0,
132
- "eval_accuracy": 0.865625,
133
- "eval_loss": 0.37425753474235535,
134
- "eval_runtime": 3.2213,
135
- "eval_samples_per_second": 99.339,
136
- "eval_steps_per_second": 3.104,
137
  "step": 80
138
  },
139
  {
140
  "epoch": 9.0,
141
- "grad_norm": 2.094912528991699,
142
  "learning_rate": 2.2222222222222223e-05,
143
- "loss": 0.3266,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 9.0,
148
- "eval_accuracy": 0.86875,
149
- "eval_loss": 0.3760340213775635,
150
- "eval_runtime": 3.2169,
151
- "eval_samples_per_second": 99.475,
152
- "eval_steps_per_second": 3.109,
153
  "step": 90
154
  },
155
  {
156
  "epoch": 10.0,
157
- "grad_norm": 1.830751657485962,
158
  "learning_rate": 1.8518518518518518e-05,
159
- "loss": 0.3222,
160
  "step": 100
161
  },
162
  {
163
  "epoch": 10.0,
164
- "eval_accuracy": 0.878125,
165
- "eval_loss": 0.3683047890663147,
166
- "eval_runtime": 3.197,
167
- "eval_samples_per_second": 100.093,
168
- "eval_steps_per_second": 3.128,
169
  "step": 100
170
  },
171
  {
172
  "epoch": 11.0,
173
- "grad_norm": 2.877479314804077,
174
  "learning_rate": 1.4814814814814815e-05,
175
- "loss": 0.3052,
176
  "step": 110
177
  },
178
  {
179
  "epoch": 11.0,
180
- "eval_accuracy": 0.865625,
181
- "eval_loss": 0.3901776075363159,
182
- "eval_runtime": 3.449,
183
- "eval_samples_per_second": 92.782,
184
- "eval_steps_per_second": 2.899,
185
  "step": 110
186
  },
187
  {
188
  "epoch": 12.0,
189
- "grad_norm": 2.059194564819336,
190
  "learning_rate": 1.1111111111111112e-05,
191
- "loss": 0.3085,
192
  "step": 120
193
  },
194
  {
195
  "epoch": 12.0,
196
- "eval_accuracy": 0.85625,
197
- "eval_loss": 0.3693941533565521,
198
- "eval_runtime": 3.1791,
199
- "eval_samples_per_second": 100.657,
200
- "eval_steps_per_second": 3.146,
201
  "step": 120
202
  },
203
  {
204
  "epoch": 13.0,
205
- "grad_norm": 2.344334840774536,
206
  "learning_rate": 7.4074074074074075e-06,
207
- "loss": 0.2994,
208
  "step": 130
209
  },
210
  {
211
  "epoch": 13.0,
212
- "eval_accuracy": 0.878125,
213
- "eval_loss": 0.3656119704246521,
214
- "eval_runtime": 3.1898,
215
- "eval_samples_per_second": 100.32,
216
- "eval_steps_per_second": 3.135,
217
  "step": 130
218
  },
219
  {
220
  "epoch": 14.0,
221
- "grad_norm": 1.6384918689727783,
222
  "learning_rate": 3.7037037037037037e-06,
223
- "loss": 0.267,
224
  "step": 140
225
  },
226
  {
227
  "epoch": 14.0,
228
- "eval_accuracy": 0.878125,
229
- "eval_loss": 0.3477219045162201,
230
- "eval_runtime": 3.2307,
231
- "eval_samples_per_second": 99.049,
232
- "eval_steps_per_second": 3.095,
233
  "step": 140
234
  },
235
  {
236
  "epoch": 15.0,
237
- "grad_norm": 2.3588130474090576,
238
  "learning_rate": 0.0,
239
- "loss": 0.281,
240
  "step": 150
241
  },
242
  {
243
  "epoch": 15.0,
244
- "eval_accuracy": 0.878125,
245
- "eval_loss": 0.3476409316062927,
246
- "eval_runtime": 3.193,
247
- "eval_samples_per_second": 100.22,
248
- "eval_steps_per_second": 3.132,
249
  "step": 150
250
  },
251
  {
252
  "epoch": 15.0,
253
  "step": 150,
254
  "total_flos": 1.4878728707899392e+18,
255
- "train_loss": 0.48002211888631185,
256
- "train_runtime": 441.2201,
257
- "train_samples_per_second": 43.516,
258
- "train_steps_per_second": 0.34
259
  }
260
  ],
261
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.915625,
3
+ "best_model_checkpoint": "vit-base-patch16-224-finalterm/checkpoint-140",
4
  "epoch": 15.0,
5
  "eval_steps": 500,
6
  "global_step": 150,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 3.922755241394043,
14
  "learning_rate": 3.3333333333333335e-05,
15
+ "loss": 1.4514,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.559375,
21
+ "eval_loss": 1.167618989944458,
22
+ "eval_runtime": 3.1299,
23
+ "eval_samples_per_second": 102.239,
24
+ "eval_steps_per_second": 3.195,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 3.1222116947174072,
30
  "learning_rate": 4.814814814814815e-05,
31
+ "loss": 1.0123,
32
  "step": 20
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.7,
37
+ "eval_loss": 0.8034278750419617,
38
+ "eval_runtime": 3.1134,
39
+ "eval_samples_per_second": 102.781,
40
+ "eval_steps_per_second": 3.212,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 2.397467613220215,
46
  "learning_rate": 4.4444444444444447e-05,
47
+ "loss": 0.6949,
48
  "step": 30
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.828125,
53
+ "eval_loss": 0.5371649861335754,
54
+ "eval_runtime": 3.1115,
55
+ "eval_samples_per_second": 102.843,
56
+ "eval_steps_per_second": 3.214,
57
  "step": 30
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 2.74021315574646,
62
  "learning_rate": 4.074074074074074e-05,
63
+ "loss": 0.5385,
64
  "step": 40
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.871875,
69
+ "eval_loss": 0.4333480894565582,
70
+ "eval_runtime": 3.1377,
71
+ "eval_samples_per_second": 101.986,
72
+ "eval_steps_per_second": 3.187,
73
  "step": 40
74
  },
75
  {
76
  "epoch": 5.0,
77
+ "grad_norm": 3.3417820930480957,
78
  "learning_rate": 3.7037037037037037e-05,
79
+ "loss": 0.4984,
80
  "step": 50
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_accuracy": 0.884375,
85
+ "eval_loss": 0.3638252317905426,
86
+ "eval_runtime": 3.1292,
87
+ "eval_samples_per_second": 102.261,
88
+ "eval_steps_per_second": 3.196,
89
  "step": 50
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "grad_norm": 2.3100404739379883,
94
  "learning_rate": 3.3333333333333335e-05,
95
+ "loss": 0.4382,
96
  "step": 60
97
  },
98
  {
99
  "epoch": 6.0,
100
+ "eval_accuracy": 0.89375,
101
+ "eval_loss": 0.31991782784461975,
102
+ "eval_runtime": 3.1525,
103
+ "eval_samples_per_second": 101.508,
104
+ "eval_steps_per_second": 3.172,
105
  "step": 60
106
  },
107
  {
108
  "epoch": 7.0,
109
+ "grad_norm": 2.219219207763672,
110
  "learning_rate": 2.962962962962963e-05,
111
+ "loss": 0.3739,
112
  "step": 70
113
  },
114
  {
115
  "epoch": 7.0,
116
+ "eval_accuracy": 0.9,
117
+ "eval_loss": 0.3272128105163574,
118
+ "eval_runtime": 3.1159,
119
+ "eval_samples_per_second": 102.7,
120
+ "eval_steps_per_second": 3.209,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 8.0,
125
+ "grad_norm": 2.4531455039978027,
126
  "learning_rate": 2.5925925925925925e-05,
127
+ "loss": 0.3692,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 8.0,
132
+ "eval_accuracy": 0.890625,
133
+ "eval_loss": 0.3230211138725281,
134
+ "eval_runtime": 3.0903,
135
+ "eval_samples_per_second": 103.548,
136
+ "eval_steps_per_second": 3.236,
137
  "step": 80
138
  },
139
  {
140
  "epoch": 9.0,
141
+ "grad_norm": 2.368349075317383,
142
  "learning_rate": 2.2222222222222223e-05,
143
+ "loss": 0.3705,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 9.0,
148
+ "eval_accuracy": 0.896875,
149
+ "eval_loss": 0.33077651262283325,
150
+ "eval_runtime": 3.1039,
151
+ "eval_samples_per_second": 103.096,
152
+ "eval_steps_per_second": 3.222,
153
  "step": 90
154
  },
155
  {
156
  "epoch": 10.0,
157
+ "grad_norm": 1.733303427696228,
158
  "learning_rate": 1.8518518518518518e-05,
159
+ "loss": 0.3458,
160
  "step": 100
161
  },
162
  {
163
  "epoch": 10.0,
164
+ "eval_accuracy": 0.9,
165
+ "eval_loss": 0.29199448227882385,
166
+ "eval_runtime": 3.1065,
167
+ "eval_samples_per_second": 103.009,
168
+ "eval_steps_per_second": 3.219,
169
  "step": 100
170
  },
171
  {
172
  "epoch": 11.0,
173
+ "grad_norm": 2.134023904800415,
174
  "learning_rate": 1.4814814814814815e-05,
175
+ "loss": 0.3176,
176
  "step": 110
177
  },
178
  {
179
  "epoch": 11.0,
180
+ "eval_accuracy": 0.90625,
181
+ "eval_loss": 0.28801438212394714,
182
+ "eval_runtime": 3.1103,
183
+ "eval_samples_per_second": 102.885,
184
+ "eval_steps_per_second": 3.215,
185
  "step": 110
186
  },
187
  {
188
  "epoch": 12.0,
189
+ "grad_norm": 2.6795620918273926,
190
  "learning_rate": 1.1111111111111112e-05,
191
+ "loss": 0.3101,
192
  "step": 120
193
  },
194
  {
195
  "epoch": 12.0,
196
+ "eval_accuracy": 0.896875,
197
+ "eval_loss": 0.3041202425956726,
198
+ "eval_runtime": 3.1014,
199
+ "eval_samples_per_second": 103.178,
200
+ "eval_steps_per_second": 3.224,
201
  "step": 120
202
  },
203
  {
204
  "epoch": 13.0,
205
+ "grad_norm": 2.048682451248169,
206
  "learning_rate": 7.4074074074074075e-06,
207
+ "loss": 0.3224,
208
  "step": 130
209
  },
210
  {
211
  "epoch": 13.0,
212
+ "eval_accuracy": 0.903125,
213
+ "eval_loss": 0.27769678831100464,
214
+ "eval_runtime": 3.0997,
215
+ "eval_samples_per_second": 103.235,
216
+ "eval_steps_per_second": 3.226,
217
  "step": 130
218
  },
219
  {
220
  "epoch": 14.0,
221
+ "grad_norm": 1.949576735496521,
222
  "learning_rate": 3.7037037037037037e-06,
223
+ "loss": 0.2946,
224
  "step": 140
225
  },
226
  {
227
  "epoch": 14.0,
228
+ "eval_accuracy": 0.915625,
229
+ "eval_loss": 0.26764923334121704,
230
+ "eval_runtime": 3.1214,
231
+ "eval_samples_per_second": 102.519,
232
+ "eval_steps_per_second": 3.204,
233
  "step": 140
234
  },
235
  {
236
  "epoch": 15.0,
237
+ "grad_norm": 2.0188732147216797,
238
  "learning_rate": 0.0,
239
+ "loss": 0.2693,
240
  "step": 150
241
  },
242
  {
243
  "epoch": 15.0,
244
+ "eval_accuracy": 0.9125,
245
+ "eval_loss": 0.27088016271591187,
246
+ "eval_runtime": 3.1019,
247
+ "eval_samples_per_second": 103.162,
248
+ "eval_steps_per_second": 3.224,
249
  "step": 150
250
  },
251
  {
252
  "epoch": 15.0,
253
  "step": 150,
254
  "total_flos": 1.4878728707899392e+18,
255
+ "train_loss": 0.5071320374806721,
256
+ "train_runtime": 447.0543,
257
+ "train_samples_per_second": 42.948,
258
+ "train_steps_per_second": 0.336
259
  }
260
  ],
261
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e79e2199995f8b4de748b18e6e9daf4a67674dcd950a0c02cccbf2d486b9b2aa
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d83c38395037e08e18b42b7fbd287badfca77ff1aa8f3bdd82a6d8f4f3cde3
3
  size 5112