CbI3PAHb commited on
Commit
20a48b6
·
verified ·
1 Parent(s): 73d2c48

Upload folder using huggingface_hub

Browse files
checkpoint-130/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Organika/sdxl-detector",
3
+ "_num_labels": 2,
4
+ "architectures": [
5
+ "SwinForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 18,
12
+ 2
13
+ ],
14
+ "drop_path_rate": 0.1,
15
+ "embed_dim": 128,
16
+ "encoder_stride": 32,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.0,
19
+ "hidden_size": 1024,
20
+ "id2label": {
21
+ "0": "artificial",
22
+ "1": "human"
23
+ },
24
+ "image_size": 224,
25
+ "initializer_range": 0.02,
26
+ "label2id": {
27
+ "artificial": 0,
28
+ "human": 1
29
+ },
30
+ "layer_norm_eps": 1e-05,
31
+ "max_length": 128,
32
+ "mlp_ratio": 4.0,
33
+ "model_type": "swin",
34
+ "num_channels": 3,
35
+ "num_heads": [
36
+ 4,
37
+ 8,
38
+ 16,
39
+ 32
40
+ ],
41
+ "num_layers": 4,
42
+ "out_features": [
43
+ "stage4"
44
+ ],
45
+ "out_indices": [
46
+ 4
47
+ ],
48
+ "padding": "max_length",
49
+ "patch_size": 4,
50
+ "path_norm": true,
51
+ "problem_type": "single_label_classification",
52
+ "qkv_bias": true,
53
+ "stage_names": [
54
+ "stem",
55
+ "stage1",
56
+ "stage2",
57
+ "stage3",
58
+ "stage4"
59
+ ],
60
+ "torch_dtype": "float32",
61
+ "transformers_version": "4.39.3",
62
+ "use_absolute_embeddings": false,
63
+ "window_size": 7
64
+ }
checkpoint-130/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a9c215382ea7372ff3d6cc70c3c6268b24b661c627fbfe8ef12ac3abd2a13a
3
+ size 347498816
checkpoint-130/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d7a56e1fa8ec2cfa10deda0b0bfd4d77ee75bf9fd7acee7ec54c35e82a6c0b6
3
+ size 694317645
checkpoint-130/preprocessor_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format"
15
+ ],
16
+ "do_normalize": true,
17
+ "do_rescale": true,
18
+ "do_resize": true,
19
+ "image_mean": [
20
+ 0.485,
21
+ 0.456,
22
+ 0.406
23
+ ],
24
+ "image_processor_type": "ViTImageProcessor",
25
+ "image_std": [
26
+ 0.229,
27
+ 0.224,
28
+ 0.225
29
+ ],
30
+ "resample": 3,
31
+ "rescale_factor": 0.00392156862745098,
32
+ "size": {
33
+ "height": 224,
34
+ "width": 224
35
+ }
36
+ }
checkpoint-130/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e6dd85f936f43466f39788b2b1110fbce547a1701a01c573f3d90de30d587fe
3
+ size 14244
checkpoint-130/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d057eafda7411f97093737064830a708851e776787dd239516be91eca678ee5d
3
+ size 1064
checkpoint-130/trainer_state.json ADDED
@@ -0,0 +1,1048 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.004848334938287735,
3
+ "best_model_checkpoint": "ai_vs_real_image_detection/checkpoint-130",
4
+ "epoch": 2.6,
5
+ "eval_steps": 10,
6
+ "global_step": 130,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 52.89345169067383,
14
+ "learning_rate": 5e-06,
15
+ "loss": 2.2763,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 62.66461944580078,
21
+ "learning_rate": 1e-05,
22
+ "loss": 1.8254,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 66.1015625,
28
+ "learning_rate": 1.5e-05,
29
+ "loss": 1.8906,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 56.700233459472656,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.3529,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 66.10917663574219,
42
+ "learning_rate": 2.5e-05,
43
+ "loss": 1.4672,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 52.68284225463867,
49
+ "learning_rate": 3e-05,
50
+ "loss": 1.0578,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 42.4580078125,
56
+ "learning_rate": 3.5e-05,
57
+ "loss": 1.1221,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 64.87982940673828,
63
+ "learning_rate": 4e-05,
64
+ "loss": 0.9313,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 56.227081298828125,
70
+ "learning_rate": 4.5e-05,
71
+ "loss": 0.644,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 13.719300270080566,
77
+ "learning_rate": 5e-05,
78
+ "loss": 0.4305,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.2,
83
+ "eval_accuracy": 0.9144736842105263,
84
+ "eval_loss": 0.19980163872241974,
85
+ "eval_runtime": 6.726,
86
+ "eval_samples_per_second": 45.198,
87
+ "eval_steps_per_second": 1.189,
88
+ "step": 10
89
+ },
90
+ {
91
+ "epoch": 0.22,
92
+ "grad_norm": 16.044822692871094,
93
+ "learning_rate": 4.973684210526316e-05,
94
+ "loss": 0.3892,
95
+ "step": 11
96
+ },
97
+ {
98
+ "epoch": 0.24,
99
+ "grad_norm": 15.53343391418457,
100
+ "learning_rate": 4.9473684210526315e-05,
101
+ "loss": 0.3683,
102
+ "step": 12
103
+ },
104
+ {
105
+ "epoch": 0.26,
106
+ "grad_norm": 13.435264587402344,
107
+ "learning_rate": 4.921052631578947e-05,
108
+ "loss": 0.3594,
109
+ "step": 13
110
+ },
111
+ {
112
+ "epoch": 0.28,
113
+ "grad_norm": 17.45833969116211,
114
+ "learning_rate": 4.8947368421052635e-05,
115
+ "loss": 0.5304,
116
+ "step": 14
117
+ },
118
+ {
119
+ "epoch": 0.3,
120
+ "grad_norm": 14.26673412322998,
121
+ "learning_rate": 4.868421052631579e-05,
122
+ "loss": 0.4804,
123
+ "step": 15
124
+ },
125
+ {
126
+ "epoch": 0.32,
127
+ "grad_norm": 9.26322078704834,
128
+ "learning_rate": 4.842105263157895e-05,
129
+ "loss": 0.3633,
130
+ "step": 16
131
+ },
132
+ {
133
+ "epoch": 0.34,
134
+ "grad_norm": 8.355168342590332,
135
+ "learning_rate": 4.8157894736842105e-05,
136
+ "loss": 0.4711,
137
+ "step": 17
138
+ },
139
+ {
140
+ "epoch": 0.36,
141
+ "grad_norm": 8.733236312866211,
142
+ "learning_rate": 4.789473684210526e-05,
143
+ "loss": 0.3954,
144
+ "step": 18
145
+ },
146
+ {
147
+ "epoch": 0.38,
148
+ "grad_norm": 5.385647296905518,
149
+ "learning_rate": 4.7631578947368424e-05,
150
+ "loss": 0.1943,
151
+ "step": 19
152
+ },
153
+ {
154
+ "epoch": 0.4,
155
+ "grad_norm": 9.892962455749512,
156
+ "learning_rate": 4.736842105263158e-05,
157
+ "loss": 0.3209,
158
+ "step": 20
159
+ },
160
+ {
161
+ "epoch": 0.4,
162
+ "eval_accuracy": 0.8881578947368421,
163
+ "eval_loss": 0.24087880551815033,
164
+ "eval_runtime": 6.8213,
165
+ "eval_samples_per_second": 44.566,
166
+ "eval_steps_per_second": 1.173,
167
+ "step": 20
168
+ },
169
+ {
170
+ "epoch": 0.42,
171
+ "grad_norm": 5.46142578125,
172
+ "learning_rate": 4.7105263157894744e-05,
173
+ "loss": 0.1505,
174
+ "step": 21
175
+ },
176
+ {
177
+ "epoch": 0.44,
178
+ "grad_norm": 9.863460540771484,
179
+ "learning_rate": 4.68421052631579e-05,
180
+ "loss": 0.2481,
181
+ "step": 22
182
+ },
183
+ {
184
+ "epoch": 0.46,
185
+ "grad_norm": 7.595032691955566,
186
+ "learning_rate": 4.657894736842106e-05,
187
+ "loss": 0.2001,
188
+ "step": 23
189
+ },
190
+ {
191
+ "epoch": 0.48,
192
+ "grad_norm": 17.375865936279297,
193
+ "learning_rate": 4.6315789473684214e-05,
194
+ "loss": 0.336,
195
+ "step": 24
196
+ },
197
+ {
198
+ "epoch": 0.5,
199
+ "grad_norm": 5.597734451293945,
200
+ "learning_rate": 4.605263157894737e-05,
201
+ "loss": 0.1395,
202
+ "step": 25
203
+ },
204
+ {
205
+ "epoch": 0.52,
206
+ "grad_norm": 7.56685209274292,
207
+ "learning_rate": 4.5789473684210527e-05,
208
+ "loss": 0.1739,
209
+ "step": 26
210
+ },
211
+ {
212
+ "epoch": 0.54,
213
+ "grad_norm": 13.769929885864258,
214
+ "learning_rate": 4.552631578947369e-05,
215
+ "loss": 0.1959,
216
+ "step": 27
217
+ },
218
+ {
219
+ "epoch": 0.56,
220
+ "grad_norm": 10.677892684936523,
221
+ "learning_rate": 4.5263157894736846e-05,
222
+ "loss": 0.2015,
223
+ "step": 28
224
+ },
225
+ {
226
+ "epoch": 0.58,
227
+ "grad_norm": 3.645374059677124,
228
+ "learning_rate": 4.5e-05,
229
+ "loss": 0.0576,
230
+ "step": 29
231
+ },
232
+ {
233
+ "epoch": 0.6,
234
+ "grad_norm": 12.538244247436523,
235
+ "learning_rate": 4.473684210526316e-05,
236
+ "loss": 0.324,
237
+ "step": 30
238
+ },
239
+ {
240
+ "epoch": 0.6,
241
+ "eval_accuracy": 0.9802631578947368,
242
+ "eval_loss": 0.056003469973802567,
243
+ "eval_runtime": 6.4198,
244
+ "eval_samples_per_second": 47.353,
245
+ "eval_steps_per_second": 1.246,
246
+ "step": 30
247
+ },
248
+ {
249
+ "epoch": 0.62,
250
+ "grad_norm": 8.700516700744629,
251
+ "learning_rate": 4.4473684210526316e-05,
252
+ "loss": 0.1199,
253
+ "step": 31
254
+ },
255
+ {
256
+ "epoch": 0.64,
257
+ "grad_norm": 6.926602840423584,
258
+ "learning_rate": 4.421052631578947e-05,
259
+ "loss": 0.1471,
260
+ "step": 32
261
+ },
262
+ {
263
+ "epoch": 0.66,
264
+ "grad_norm": 8.461554527282715,
265
+ "learning_rate": 4.394736842105263e-05,
266
+ "loss": 0.0649,
267
+ "step": 33
268
+ },
269
+ {
270
+ "epoch": 0.68,
271
+ "grad_norm": 7.129871845245361,
272
+ "learning_rate": 4.368421052631579e-05,
273
+ "loss": 0.1167,
274
+ "step": 34
275
+ },
276
+ {
277
+ "epoch": 0.7,
278
+ "grad_norm": 20.945388793945312,
279
+ "learning_rate": 4.342105263157895e-05,
280
+ "loss": 0.3279,
281
+ "step": 35
282
+ },
283
+ {
284
+ "epoch": 0.72,
285
+ "grad_norm": 4.466580390930176,
286
+ "learning_rate": 4.3157894736842105e-05,
287
+ "loss": 0.0517,
288
+ "step": 36
289
+ },
290
+ {
291
+ "epoch": 0.74,
292
+ "grad_norm": 9.97375202178955,
293
+ "learning_rate": 4.289473684210527e-05,
294
+ "loss": 0.1241,
295
+ "step": 37
296
+ },
297
+ {
298
+ "epoch": 0.76,
299
+ "grad_norm": 6.459863662719727,
300
+ "learning_rate": 4.2631578947368425e-05,
301
+ "loss": 0.0859,
302
+ "step": 38
303
+ },
304
+ {
305
+ "epoch": 0.78,
306
+ "grad_norm": 11.704880714416504,
307
+ "learning_rate": 4.236842105263158e-05,
308
+ "loss": 0.1252,
309
+ "step": 39
310
+ },
311
+ {
312
+ "epoch": 0.8,
313
+ "grad_norm": 7.90261697769165,
314
+ "learning_rate": 4.210526315789474e-05,
315
+ "loss": 0.0541,
316
+ "step": 40
317
+ },
318
+ {
319
+ "epoch": 0.8,
320
+ "eval_accuracy": 0.9671052631578947,
321
+ "eval_loss": 0.06965293735265732,
322
+ "eval_runtime": 6.6281,
323
+ "eval_samples_per_second": 45.865,
324
+ "eval_steps_per_second": 1.207,
325
+ "step": 40
326
+ },
327
+ {
328
+ "epoch": 0.82,
329
+ "grad_norm": 2.8429932594299316,
330
+ "learning_rate": 4.18421052631579e-05,
331
+ "loss": 0.0325,
332
+ "step": 41
333
+ },
334
+ {
335
+ "epoch": 0.84,
336
+ "grad_norm": 10.533605575561523,
337
+ "learning_rate": 4.157894736842106e-05,
338
+ "loss": 0.1836,
339
+ "step": 42
340
+ },
341
+ {
342
+ "epoch": 0.86,
343
+ "grad_norm": 12.08105754852295,
344
+ "learning_rate": 4.1315789473684214e-05,
345
+ "loss": 0.224,
346
+ "step": 43
347
+ },
348
+ {
349
+ "epoch": 0.88,
350
+ "grad_norm": 9.006488800048828,
351
+ "learning_rate": 4.105263157894737e-05,
352
+ "loss": 0.2139,
353
+ "step": 44
354
+ },
355
+ {
356
+ "epoch": 0.9,
357
+ "grad_norm": 7.3875956535339355,
358
+ "learning_rate": 4.078947368421053e-05,
359
+ "loss": 0.088,
360
+ "step": 45
361
+ },
362
+ {
363
+ "epoch": 0.92,
364
+ "grad_norm": 13.019746780395508,
365
+ "learning_rate": 4.0526315789473684e-05,
366
+ "loss": 0.2363,
367
+ "step": 46
368
+ },
369
+ {
370
+ "epoch": 0.94,
371
+ "grad_norm": 37.95082473754883,
372
+ "learning_rate": 4.026315789473684e-05,
373
+ "loss": 0.3728,
374
+ "step": 47
375
+ },
376
+ {
377
+ "epoch": 0.96,
378
+ "grad_norm": 13.99156665802002,
379
+ "learning_rate": 4e-05,
380
+ "loss": 0.1376,
381
+ "step": 48
382
+ },
383
+ {
384
+ "epoch": 0.98,
385
+ "grad_norm": 5.063250541687012,
386
+ "learning_rate": 3.973684210526316e-05,
387
+ "loss": 0.0832,
388
+ "step": 49
389
+ },
390
+ {
391
+ "epoch": 1.0,
392
+ "grad_norm": 13.046541213989258,
393
+ "learning_rate": 3.9473684210526316e-05,
394
+ "loss": 0.1096,
395
+ "step": 50
396
+ },
397
+ {
398
+ "epoch": 1.0,
399
+ "eval_accuracy": 0.9572368421052632,
400
+ "eval_loss": 0.12372467666864395,
401
+ "eval_runtime": 6.8765,
402
+ "eval_samples_per_second": 44.209,
403
+ "eval_steps_per_second": 1.163,
404
+ "step": 50
405
+ },
406
+ {
407
+ "epoch": 1.02,
408
+ "grad_norm": 4.865447998046875,
409
+ "learning_rate": 3.921052631578947e-05,
410
+ "loss": 0.0366,
411
+ "step": 51
412
+ },
413
+ {
414
+ "epoch": 1.04,
415
+ "grad_norm": 12.430005073547363,
416
+ "learning_rate": 3.894736842105263e-05,
417
+ "loss": 0.1301,
418
+ "step": 52
419
+ },
420
+ {
421
+ "epoch": 1.06,
422
+ "grad_norm": 2.1291539669036865,
423
+ "learning_rate": 3.868421052631579e-05,
424
+ "loss": 0.0168,
425
+ "step": 53
426
+ },
427
+ {
428
+ "epoch": 1.08,
429
+ "grad_norm": 1.6922448873519897,
430
+ "learning_rate": 3.842105263157895e-05,
431
+ "loss": 0.0074,
432
+ "step": 54
433
+ },
434
+ {
435
+ "epoch": 1.1,
436
+ "grad_norm": 5.851077556610107,
437
+ "learning_rate": 3.815789473684211e-05,
438
+ "loss": 0.0216,
439
+ "step": 55
440
+ },
441
+ {
442
+ "epoch": 1.12,
443
+ "grad_norm": 21.35593605041504,
444
+ "learning_rate": 3.789473684210527e-05,
445
+ "loss": 0.1171,
446
+ "step": 56
447
+ },
448
+ {
449
+ "epoch": 1.14,
450
+ "grad_norm": 9.791412353515625,
451
+ "learning_rate": 3.7631578947368425e-05,
452
+ "loss": 0.1688,
453
+ "step": 57
454
+ },
455
+ {
456
+ "epoch": 1.16,
457
+ "grad_norm": 3.2542812824249268,
458
+ "learning_rate": 3.736842105263158e-05,
459
+ "loss": 0.0236,
460
+ "step": 58
461
+ },
462
+ {
463
+ "epoch": 1.18,
464
+ "grad_norm": 9.557928085327148,
465
+ "learning_rate": 3.710526315789474e-05,
466
+ "loss": 0.1782,
467
+ "step": 59
468
+ },
469
+ {
470
+ "epoch": 1.2,
471
+ "grad_norm": 12.514122009277344,
472
+ "learning_rate": 3.6842105263157895e-05,
473
+ "loss": 0.0905,
474
+ "step": 60
475
+ },
476
+ {
477
+ "epoch": 1.2,
478
+ "eval_accuracy": 0.9967105263157895,
479
+ "eval_loss": 0.027645185589790344,
480
+ "eval_runtime": 7.1325,
481
+ "eval_samples_per_second": 42.622,
482
+ "eval_steps_per_second": 1.122,
483
+ "step": 60
484
+ },
485
+ {
486
+ "epoch": 1.22,
487
+ "grad_norm": 4.434255599975586,
488
+ "learning_rate": 3.657894736842106e-05,
489
+ "loss": 0.0276,
490
+ "step": 61
491
+ },
492
+ {
493
+ "epoch": 1.24,
494
+ "grad_norm": 6.304673671722412,
495
+ "learning_rate": 3.6315789473684214e-05,
496
+ "loss": 0.0508,
497
+ "step": 62
498
+ },
499
+ {
500
+ "epoch": 1.26,
501
+ "grad_norm": 3.844200372695923,
502
+ "learning_rate": 3.605263157894737e-05,
503
+ "loss": 0.0199,
504
+ "step": 63
505
+ },
506
+ {
507
+ "epoch": 1.28,
508
+ "grad_norm": 0.8185235261917114,
509
+ "learning_rate": 3.578947368421053e-05,
510
+ "loss": 0.0069,
511
+ "step": 64
512
+ },
513
+ {
514
+ "epoch": 1.3,
515
+ "grad_norm": 6.909903049468994,
516
+ "learning_rate": 3.5526315789473684e-05,
517
+ "loss": 0.0597,
518
+ "step": 65
519
+ },
520
+ {
521
+ "epoch": 1.32,
522
+ "grad_norm": 3.5652379989624023,
523
+ "learning_rate": 3.526315789473684e-05,
524
+ "loss": 0.0141,
525
+ "step": 66
526
+ },
527
+ {
528
+ "epoch": 1.34,
529
+ "grad_norm": 11.72857666015625,
530
+ "learning_rate": 3.5e-05,
531
+ "loss": 0.1865,
532
+ "step": 67
533
+ },
534
+ {
535
+ "epoch": 1.36,
536
+ "grad_norm": 10.541884422302246,
537
+ "learning_rate": 3.473684210526316e-05,
538
+ "loss": 0.0606,
539
+ "step": 68
540
+ },
541
+ {
542
+ "epoch": 1.38,
543
+ "grad_norm": 5.210526943206787,
544
+ "learning_rate": 3.447368421052632e-05,
545
+ "loss": 0.0276,
546
+ "step": 69
547
+ },
548
+ {
549
+ "epoch": 1.4,
550
+ "grad_norm": 1.0411546230316162,
551
+ "learning_rate": 3.421052631578947e-05,
552
+ "loss": 0.009,
553
+ "step": 70
554
+ },
555
+ {
556
+ "epoch": 1.4,
557
+ "eval_accuracy": 0.9901315789473685,
558
+ "eval_loss": 0.020390018820762634,
559
+ "eval_runtime": 7.2971,
560
+ "eval_samples_per_second": 41.66,
561
+ "eval_steps_per_second": 1.096,
562
+ "step": 70
563
+ },
564
+ {
565
+ "epoch": 1.42,
566
+ "grad_norm": 0.7153753638267517,
567
+ "learning_rate": 3.3947368421052636e-05,
568
+ "loss": 0.0049,
569
+ "step": 71
570
+ },
571
+ {
572
+ "epoch": 1.44,
573
+ "grad_norm": 4.828935623168945,
574
+ "learning_rate": 3.368421052631579e-05,
575
+ "loss": 0.0297,
576
+ "step": 72
577
+ },
578
+ {
579
+ "epoch": 1.46,
580
+ "grad_norm": 9.103313446044922,
581
+ "learning_rate": 3.342105263157895e-05,
582
+ "loss": 0.071,
583
+ "step": 73
584
+ },
585
+ {
586
+ "epoch": 1.48,
587
+ "grad_norm": 7.635372161865234,
588
+ "learning_rate": 3.3157894736842106e-05,
589
+ "loss": 0.0363,
590
+ "step": 74
591
+ },
592
+ {
593
+ "epoch": 1.5,
594
+ "grad_norm": 8.57288646697998,
595
+ "learning_rate": 3.289473684210527e-05,
596
+ "loss": 0.0566,
597
+ "step": 75
598
+ },
599
+ {
600
+ "epoch": 1.52,
601
+ "grad_norm": 8.89769172668457,
602
+ "learning_rate": 3.2631578947368426e-05,
603
+ "loss": 0.0213,
604
+ "step": 76
605
+ },
606
+ {
607
+ "epoch": 1.54,
608
+ "grad_norm": 2.804738998413086,
609
+ "learning_rate": 3.236842105263158e-05,
610
+ "loss": 0.0138,
611
+ "step": 77
612
+ },
613
+ {
614
+ "epoch": 1.56,
615
+ "grad_norm": 4.841202259063721,
616
+ "learning_rate": 3.210526315789474e-05,
617
+ "loss": 0.0181,
618
+ "step": 78
619
+ },
620
+ {
621
+ "epoch": 1.58,
622
+ "grad_norm": 7.109033584594727,
623
+ "learning_rate": 3.1842105263157895e-05,
624
+ "loss": 0.0332,
625
+ "step": 79
626
+ },
627
+ {
628
+ "epoch": 1.6,
629
+ "grad_norm": 13.540718078613281,
630
+ "learning_rate": 3.157894736842105e-05,
631
+ "loss": 0.1667,
632
+ "step": 80
633
+ },
634
+ {
635
+ "epoch": 1.6,
636
+ "eval_accuracy": 0.9703947368421053,
637
+ "eval_loss": 0.10031529515981674,
638
+ "eval_runtime": 6.7491,
639
+ "eval_samples_per_second": 45.043,
640
+ "eval_steps_per_second": 1.185,
641
+ "step": 80
642
+ },
643
+ {
644
+ "epoch": 1.62,
645
+ "grad_norm": 30.279451370239258,
646
+ "learning_rate": 3.131578947368421e-05,
647
+ "loss": 0.1139,
648
+ "step": 81
649
+ },
650
+ {
651
+ "epoch": 1.64,
652
+ "grad_norm": 7.229616165161133,
653
+ "learning_rate": 3.105263157894737e-05,
654
+ "loss": 0.0284,
655
+ "step": 82
656
+ },
657
+ {
658
+ "epoch": 1.66,
659
+ "grad_norm": 0.327736496925354,
660
+ "learning_rate": 3.078947368421053e-05,
661
+ "loss": 0.0032,
662
+ "step": 83
663
+ },
664
+ {
665
+ "epoch": 1.68,
666
+ "grad_norm": 0.15309709310531616,
667
+ "learning_rate": 3.0526315789473684e-05,
668
+ "loss": 0.0011,
669
+ "step": 84
670
+ },
671
+ {
672
+ "epoch": 1.7,
673
+ "grad_norm": 13.175533294677734,
674
+ "learning_rate": 3.0263157894736844e-05,
675
+ "loss": 0.039,
676
+ "step": 85
677
+ },
678
+ {
679
+ "epoch": 1.72,
680
+ "grad_norm": 24.578662872314453,
681
+ "learning_rate": 3e-05,
682
+ "loss": 0.1834,
683
+ "step": 86
684
+ },
685
+ {
686
+ "epoch": 1.74,
687
+ "grad_norm": 6.690978527069092,
688
+ "learning_rate": 2.9736842105263157e-05,
689
+ "loss": 0.0214,
690
+ "step": 87
691
+ },
692
+ {
693
+ "epoch": 1.76,
694
+ "grad_norm": 21.097963333129883,
695
+ "learning_rate": 2.9473684210526314e-05,
696
+ "loss": 0.1569,
697
+ "step": 88
698
+ },
699
+ {
700
+ "epoch": 1.78,
701
+ "grad_norm": 7.984706401824951,
702
+ "learning_rate": 2.9210526315789477e-05,
703
+ "loss": 0.0326,
704
+ "step": 89
705
+ },
706
+ {
707
+ "epoch": 1.8,
708
+ "grad_norm": 11.008134841918945,
709
+ "learning_rate": 2.8947368421052634e-05,
710
+ "loss": 0.0602,
711
+ "step": 90
712
+ },
713
+ {
714
+ "epoch": 1.8,
715
+ "eval_accuracy": 0.9868421052631579,
716
+ "eval_loss": 0.052807241678237915,
717
+ "eval_runtime": 7.1111,
718
+ "eval_samples_per_second": 42.75,
719
+ "eval_steps_per_second": 1.125,
720
+ "step": 90
721
+ },
722
+ {
723
+ "epoch": 1.82,
724
+ "grad_norm": 21.149032592773438,
725
+ "learning_rate": 2.868421052631579e-05,
726
+ "loss": 0.0265,
727
+ "step": 91
728
+ },
729
+ {
730
+ "epoch": 1.84,
731
+ "grad_norm": 3.4383814334869385,
732
+ "learning_rate": 2.842105263157895e-05,
733
+ "loss": 0.0059,
734
+ "step": 92
735
+ },
736
+ {
737
+ "epoch": 1.86,
738
+ "grad_norm": 6.291805267333984,
739
+ "learning_rate": 2.8157894736842106e-05,
740
+ "loss": 0.0177,
741
+ "step": 93
742
+ },
743
+ {
744
+ "epoch": 1.88,
745
+ "grad_norm": 1.5197041034698486,
746
+ "learning_rate": 2.7894736842105263e-05,
747
+ "loss": 0.0065,
748
+ "step": 94
749
+ },
750
+ {
751
+ "epoch": 1.9,
752
+ "grad_norm": 0.47119140625,
753
+ "learning_rate": 2.7631578947368426e-05,
754
+ "loss": 0.0018,
755
+ "step": 95
756
+ },
757
+ {
758
+ "epoch": 1.92,
759
+ "grad_norm": 7.311519145965576,
760
+ "learning_rate": 2.7368421052631583e-05,
761
+ "loss": 0.0235,
762
+ "step": 96
763
+ },
764
+ {
765
+ "epoch": 1.94,
766
+ "grad_norm": 53.3145866394043,
767
+ "learning_rate": 2.710526315789474e-05,
768
+ "loss": 0.1186,
769
+ "step": 97
770
+ },
771
+ {
772
+ "epoch": 1.96,
773
+ "grad_norm": 13.420350074768066,
774
+ "learning_rate": 2.6842105263157896e-05,
775
+ "loss": 0.2332,
776
+ "step": 98
777
+ },
778
+ {
779
+ "epoch": 1.98,
780
+ "grad_norm": 13.3897123336792,
781
+ "learning_rate": 2.6578947368421052e-05,
782
+ "loss": 0.0759,
783
+ "step": 99
784
+ },
785
+ {
786
+ "epoch": 2.0,
787
+ "grad_norm": 1.8416231870651245,
788
+ "learning_rate": 2.6315789473684212e-05,
789
+ "loss": 0.0076,
790
+ "step": 100
791
+ },
792
+ {
793
+ "epoch": 2.0,
794
+ "eval_accuracy": 0.993421052631579,
795
+ "eval_loss": 0.027568014338612556,
796
+ "eval_runtime": 7.0992,
797
+ "eval_samples_per_second": 42.822,
798
+ "eval_steps_per_second": 1.127,
799
+ "step": 100
800
+ },
801
+ {
802
+ "epoch": 2.02,
803
+ "grad_norm": 6.973387241363525,
804
+ "learning_rate": 2.605263157894737e-05,
805
+ "loss": 0.0255,
806
+ "step": 101
807
+ },
808
+ {
809
+ "epoch": 2.04,
810
+ "grad_norm": 0.33660009503364563,
811
+ "learning_rate": 2.578947368421053e-05,
812
+ "loss": 0.0012,
813
+ "step": 102
814
+ },
815
+ {
816
+ "epoch": 2.06,
817
+ "grad_norm": 11.269298553466797,
818
+ "learning_rate": 2.5526315789473688e-05,
819
+ "loss": 0.1097,
820
+ "step": 103
821
+ },
822
+ {
823
+ "epoch": 2.08,
824
+ "grad_norm": 7.602676868438721,
825
+ "learning_rate": 2.5263157894736845e-05,
826
+ "loss": 0.034,
827
+ "step": 104
828
+ },
829
+ {
830
+ "epoch": 2.1,
831
+ "grad_norm": 7.900285720825195,
832
+ "learning_rate": 2.5e-05,
833
+ "loss": 0.0399,
834
+ "step": 105
835
+ },
836
+ {
837
+ "epoch": 2.12,
838
+ "grad_norm": 4.07220983505249,
839
+ "learning_rate": 2.4736842105263158e-05,
840
+ "loss": 0.0055,
841
+ "step": 106
842
+ },
843
+ {
844
+ "epoch": 2.14,
845
+ "grad_norm": 0.09085848927497864,
846
+ "learning_rate": 2.4473684210526318e-05,
847
+ "loss": 0.0005,
848
+ "step": 107
849
+ },
850
+ {
851
+ "epoch": 2.16,
852
+ "grad_norm": 0.5666660666465759,
853
+ "learning_rate": 2.4210526315789474e-05,
854
+ "loss": 0.0029,
855
+ "step": 108
856
+ },
857
+ {
858
+ "epoch": 2.18,
859
+ "grad_norm": 12.90062427520752,
860
+ "learning_rate": 2.394736842105263e-05,
861
+ "loss": 0.051,
862
+ "step": 109
863
+ },
864
+ {
865
+ "epoch": 2.2,
866
+ "grad_norm": 0.24952377378940582,
867
+ "learning_rate": 2.368421052631579e-05,
868
+ "loss": 0.0007,
869
+ "step": 110
870
+ },
871
+ {
872
+ "epoch": 2.2,
873
+ "eval_accuracy": 0.9835526315789473,
874
+ "eval_loss": 0.05723079293966293,
875
+ "eval_runtime": 7.2757,
876
+ "eval_samples_per_second": 41.783,
877
+ "eval_steps_per_second": 1.1,
878
+ "step": 110
879
+ },
880
+ {
881
+ "epoch": 2.22,
882
+ "grad_norm": 2.654731512069702,
883
+ "learning_rate": 2.342105263157895e-05,
884
+ "loss": 0.0036,
885
+ "step": 111
886
+ },
887
+ {
888
+ "epoch": 2.24,
889
+ "grad_norm": 0.5114130973815918,
890
+ "learning_rate": 2.3157894736842107e-05,
891
+ "loss": 0.0012,
892
+ "step": 112
893
+ },
894
+ {
895
+ "epoch": 2.26,
896
+ "grad_norm": 9.55749797821045,
897
+ "learning_rate": 2.2894736842105263e-05,
898
+ "loss": 0.0195,
899
+ "step": 113
900
+ },
901
+ {
902
+ "epoch": 2.28,
903
+ "grad_norm": 11.102432250976562,
904
+ "learning_rate": 2.2631578947368423e-05,
905
+ "loss": 0.0204,
906
+ "step": 114
907
+ },
908
+ {
909
+ "epoch": 2.3,
910
+ "grad_norm": 2.684544324874878,
911
+ "learning_rate": 2.236842105263158e-05,
912
+ "loss": 0.0047,
913
+ "step": 115
914
+ },
915
+ {
916
+ "epoch": 2.32,
917
+ "grad_norm": 0.22236469388008118,
918
+ "learning_rate": 2.2105263157894736e-05,
919
+ "loss": 0.0008,
920
+ "step": 116
921
+ },
922
+ {
923
+ "epoch": 2.34,
924
+ "grad_norm": 8.926434516906738,
925
+ "learning_rate": 2.1842105263157896e-05,
926
+ "loss": 0.0223,
927
+ "step": 117
928
+ },
929
+ {
930
+ "epoch": 2.36,
931
+ "grad_norm": 3.3315141201019287,
932
+ "learning_rate": 2.1578947368421053e-05,
933
+ "loss": 0.0117,
934
+ "step": 118
935
+ },
936
+ {
937
+ "epoch": 2.38,
938
+ "grad_norm": 10.430353164672852,
939
+ "learning_rate": 2.1315789473684212e-05,
940
+ "loss": 0.0303,
941
+ "step": 119
942
+ },
943
+ {
944
+ "epoch": 2.4,
945
+ "grad_norm": 0.15297500789165497,
946
+ "learning_rate": 2.105263157894737e-05,
947
+ "loss": 0.0008,
948
+ "step": 120
949
+ },
950
+ {
951
+ "epoch": 2.4,
952
+ "eval_accuracy": 0.9967105263157895,
953
+ "eval_loss": 0.009093067608773708,
954
+ "eval_runtime": 6.8131,
955
+ "eval_samples_per_second": 44.62,
956
+ "eval_steps_per_second": 1.174,
957
+ "step": 120
958
+ },
959
+ {
960
+ "epoch": 2.42,
961
+ "grad_norm": 0.6949332356452942,
962
+ "learning_rate": 2.078947368421053e-05,
963
+ "loss": 0.0018,
964
+ "step": 121
965
+ },
966
+ {
967
+ "epoch": 2.44,
968
+ "grad_norm": 0.10584941506385803,
969
+ "learning_rate": 2.0526315789473685e-05,
970
+ "loss": 0.0006,
971
+ "step": 122
972
+ },
973
+ {
974
+ "epoch": 2.46,
975
+ "grad_norm": 2.782750368118286,
976
+ "learning_rate": 2.0263157894736842e-05,
977
+ "loss": 0.0064,
978
+ "step": 123
979
+ },
980
+ {
981
+ "epoch": 2.48,
982
+ "grad_norm": 0.1843283325433731,
983
+ "learning_rate": 2e-05,
984
+ "loss": 0.0007,
985
+ "step": 124
986
+ },
987
+ {
988
+ "epoch": 2.5,
989
+ "grad_norm": 19.077423095703125,
990
+ "learning_rate": 1.9736842105263158e-05,
991
+ "loss": 0.1095,
992
+ "step": 125
993
+ },
994
+ {
995
+ "epoch": 2.52,
996
+ "grad_norm": 9.631857872009277,
997
+ "learning_rate": 1.9473684210526315e-05,
998
+ "loss": 0.0286,
999
+ "step": 126
1000
+ },
1001
+ {
1002
+ "epoch": 2.54,
1003
+ "grad_norm": 2.46711802482605,
1004
+ "learning_rate": 1.9210526315789474e-05,
1005
+ "loss": 0.0042,
1006
+ "step": 127
1007
+ },
1008
+ {
1009
+ "epoch": 2.56,
1010
+ "grad_norm": 0.9471967220306396,
1011
+ "learning_rate": 1.8947368421052634e-05,
1012
+ "loss": 0.0038,
1013
+ "step": 128
1014
+ },
1015
+ {
1016
+ "epoch": 2.58,
1017
+ "grad_norm": 0.1509537547826767,
1018
+ "learning_rate": 1.868421052631579e-05,
1019
+ "loss": 0.0009,
1020
+ "step": 129
1021
+ },
1022
+ {
1023
+ "epoch": 2.6,
1024
+ "grad_norm": 16.95519256591797,
1025
+ "learning_rate": 1.8421052631578947e-05,
1026
+ "loss": 0.1277,
1027
+ "step": 130
1028
+ },
1029
+ {
1030
+ "epoch": 2.6,
1031
+ "eval_accuracy": 0.9967105263157895,
1032
+ "eval_loss": 0.004848334938287735,
1033
+ "eval_runtime": 7.1393,
1034
+ "eval_samples_per_second": 42.581,
1035
+ "eval_steps_per_second": 1.121,
1036
+ "step": 130
1037
+ }
1038
+ ],
1039
+ "logging_steps": 1,
1040
+ "max_steps": 200,
1041
+ "num_input_tokens_seen": 0,
1042
+ "num_train_epochs": 4,
1043
+ "save_steps": 10,
1044
+ "total_flos": 4.0410637592938906e+17,
1045
+ "train_batch_size": 40,
1046
+ "trial_name": null,
1047
+ "trial_params": null
1048
+ }
checkpoint-130/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12326b817ccf86422554696b6e15d756b4174e67e313ecfbe14aadf3bb73244
3
+ size 4856
checkpoint-190/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Organika/sdxl-detector",
3
+ "_num_labels": 2,
4
+ "architectures": [
5
+ "SwinForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 18,
12
+ 2
13
+ ],
14
+ "drop_path_rate": 0.1,
15
+ "embed_dim": 128,
16
+ "encoder_stride": 32,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.0,
19
+ "hidden_size": 1024,
20
+ "id2label": {
21
+ "0": "artificial",
22
+ "1": "human"
23
+ },
24
+ "image_size": 224,
25
+ "initializer_range": 0.02,
26
+ "label2id": {
27
+ "artificial": 0,
28
+ "human": 1
29
+ },
30
+ "layer_norm_eps": 1e-05,
31
+ "max_length": 128,
32
+ "mlp_ratio": 4.0,
33
+ "model_type": "swin",
34
+ "num_channels": 3,
35
+ "num_heads": [
36
+ 4,
37
+ 8,
38
+ 16,
39
+ 32
40
+ ],
41
+ "num_layers": 4,
42
+ "out_features": [
43
+ "stage4"
44
+ ],
45
+ "out_indices": [
46
+ 4
47
+ ],
48
+ "padding": "max_length",
49
+ "patch_size": 4,
50
+ "path_norm": true,
51
+ "problem_type": "single_label_classification",
52
+ "qkv_bias": true,
53
+ "stage_names": [
54
+ "stem",
55
+ "stage1",
56
+ "stage2",
57
+ "stage3",
58
+ "stage4"
59
+ ],
60
+ "torch_dtype": "float32",
61
+ "transformers_version": "4.39.3",
62
+ "use_absolute_embeddings": false,
63
+ "window_size": 7
64
+ }
checkpoint-190/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a362f01847b6f823cfdf2f60859ea678b90242148dc1eb2218a5bf6ee5fa54bd
3
+ size 347498816
checkpoint-190/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a961a977a85ba1ef4943ab2cc748ffd5de8ef9677eb7c5f0b3c752dd55f1986
3
+ size 694317645
checkpoint-190/preprocessor_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format"
15
+ ],
16
+ "do_normalize": true,
17
+ "do_rescale": true,
18
+ "do_resize": true,
19
+ "image_mean": [
20
+ 0.485,
21
+ 0.456,
22
+ 0.406
23
+ ],
24
+ "image_processor_type": "ViTImageProcessor",
25
+ "image_std": [
26
+ 0.229,
27
+ 0.224,
28
+ 0.225
29
+ ],
30
+ "resample": 3,
31
+ "rescale_factor": 0.00392156862745098,
32
+ "size": {
33
+ "height": 224,
34
+ "width": 224
35
+ }
36
+ }
checkpoint-190/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2b872ea464d68185e024295eb3a72fc1615395a5617c66d16dbb8cbf3226ee7
3
+ size 14244
checkpoint-190/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:359e75abc7a98a86bc32539a93eabc5596c39ca4b881df61b9957f610c6eecb0
3
+ size 1064
checkpoint-190/trainer_state.json ADDED
@@ -0,0 +1,1522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.004848334938287735,
3
+ "best_model_checkpoint": "ai_vs_real_image_detection/checkpoint-130",
4
+ "epoch": 3.8,
5
+ "eval_steps": 10,
6
+ "global_step": 190,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 52.89345169067383,
14
+ "learning_rate": 5e-06,
15
+ "loss": 2.2763,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 62.66461944580078,
21
+ "learning_rate": 1e-05,
22
+ "loss": 1.8254,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 66.1015625,
28
+ "learning_rate": 1.5e-05,
29
+ "loss": 1.8906,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 56.700233459472656,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.3529,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 66.10917663574219,
42
+ "learning_rate": 2.5e-05,
43
+ "loss": 1.4672,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 52.68284225463867,
49
+ "learning_rate": 3e-05,
50
+ "loss": 1.0578,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 42.4580078125,
56
+ "learning_rate": 3.5e-05,
57
+ "loss": 1.1221,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 64.87982940673828,
63
+ "learning_rate": 4e-05,
64
+ "loss": 0.9313,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 56.227081298828125,
70
+ "learning_rate": 4.5e-05,
71
+ "loss": 0.644,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 13.719300270080566,
77
+ "learning_rate": 5e-05,
78
+ "loss": 0.4305,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.2,
83
+ "eval_accuracy": 0.9144736842105263,
84
+ "eval_loss": 0.19980163872241974,
85
+ "eval_runtime": 6.726,
86
+ "eval_samples_per_second": 45.198,
87
+ "eval_steps_per_second": 1.189,
88
+ "step": 10
89
+ },
90
+ {
91
+ "epoch": 0.22,
92
+ "grad_norm": 16.044822692871094,
93
+ "learning_rate": 4.973684210526316e-05,
94
+ "loss": 0.3892,
95
+ "step": 11
96
+ },
97
+ {
98
+ "epoch": 0.24,
99
+ "grad_norm": 15.53343391418457,
100
+ "learning_rate": 4.9473684210526315e-05,
101
+ "loss": 0.3683,
102
+ "step": 12
103
+ },
104
+ {
105
+ "epoch": 0.26,
106
+ "grad_norm": 13.435264587402344,
107
+ "learning_rate": 4.921052631578947e-05,
108
+ "loss": 0.3594,
109
+ "step": 13
110
+ },
111
+ {
112
+ "epoch": 0.28,
113
+ "grad_norm": 17.45833969116211,
114
+ "learning_rate": 4.8947368421052635e-05,
115
+ "loss": 0.5304,
116
+ "step": 14
117
+ },
118
+ {
119
+ "epoch": 0.3,
120
+ "grad_norm": 14.26673412322998,
121
+ "learning_rate": 4.868421052631579e-05,
122
+ "loss": 0.4804,
123
+ "step": 15
124
+ },
125
+ {
126
+ "epoch": 0.32,
127
+ "grad_norm": 9.26322078704834,
128
+ "learning_rate": 4.842105263157895e-05,
129
+ "loss": 0.3633,
130
+ "step": 16
131
+ },
132
+ {
133
+ "epoch": 0.34,
134
+ "grad_norm": 8.355168342590332,
135
+ "learning_rate": 4.8157894736842105e-05,
136
+ "loss": 0.4711,
137
+ "step": 17
138
+ },
139
+ {
140
+ "epoch": 0.36,
141
+ "grad_norm": 8.733236312866211,
142
+ "learning_rate": 4.789473684210526e-05,
143
+ "loss": 0.3954,
144
+ "step": 18
145
+ },
146
+ {
147
+ "epoch": 0.38,
148
+ "grad_norm": 5.385647296905518,
149
+ "learning_rate": 4.7631578947368424e-05,
150
+ "loss": 0.1943,
151
+ "step": 19
152
+ },
153
+ {
154
+ "epoch": 0.4,
155
+ "grad_norm": 9.892962455749512,
156
+ "learning_rate": 4.736842105263158e-05,
157
+ "loss": 0.3209,
158
+ "step": 20
159
+ },
160
+ {
161
+ "epoch": 0.4,
162
+ "eval_accuracy": 0.8881578947368421,
163
+ "eval_loss": 0.24087880551815033,
164
+ "eval_runtime": 6.8213,
165
+ "eval_samples_per_second": 44.566,
166
+ "eval_steps_per_second": 1.173,
167
+ "step": 20
168
+ },
169
+ {
170
+ "epoch": 0.42,
171
+ "grad_norm": 5.46142578125,
172
+ "learning_rate": 4.7105263157894744e-05,
173
+ "loss": 0.1505,
174
+ "step": 21
175
+ },
176
+ {
177
+ "epoch": 0.44,
178
+ "grad_norm": 9.863460540771484,
179
+ "learning_rate": 4.68421052631579e-05,
180
+ "loss": 0.2481,
181
+ "step": 22
182
+ },
183
+ {
184
+ "epoch": 0.46,
185
+ "grad_norm": 7.595032691955566,
186
+ "learning_rate": 4.657894736842106e-05,
187
+ "loss": 0.2001,
188
+ "step": 23
189
+ },
190
+ {
191
+ "epoch": 0.48,
192
+ "grad_norm": 17.375865936279297,
193
+ "learning_rate": 4.6315789473684214e-05,
194
+ "loss": 0.336,
195
+ "step": 24
196
+ },
197
+ {
198
+ "epoch": 0.5,
199
+ "grad_norm": 5.597734451293945,
200
+ "learning_rate": 4.605263157894737e-05,
201
+ "loss": 0.1395,
202
+ "step": 25
203
+ },
204
+ {
205
+ "epoch": 0.52,
206
+ "grad_norm": 7.56685209274292,
207
+ "learning_rate": 4.5789473684210527e-05,
208
+ "loss": 0.1739,
209
+ "step": 26
210
+ },
211
+ {
212
+ "epoch": 0.54,
213
+ "grad_norm": 13.769929885864258,
214
+ "learning_rate": 4.552631578947369e-05,
215
+ "loss": 0.1959,
216
+ "step": 27
217
+ },
218
+ {
219
+ "epoch": 0.56,
220
+ "grad_norm": 10.677892684936523,
221
+ "learning_rate": 4.5263157894736846e-05,
222
+ "loss": 0.2015,
223
+ "step": 28
224
+ },
225
+ {
226
+ "epoch": 0.58,
227
+ "grad_norm": 3.645374059677124,
228
+ "learning_rate": 4.5e-05,
229
+ "loss": 0.0576,
230
+ "step": 29
231
+ },
232
+ {
233
+ "epoch": 0.6,
234
+ "grad_norm": 12.538244247436523,
235
+ "learning_rate": 4.473684210526316e-05,
236
+ "loss": 0.324,
237
+ "step": 30
238
+ },
239
+ {
240
+ "epoch": 0.6,
241
+ "eval_accuracy": 0.9802631578947368,
242
+ "eval_loss": 0.056003469973802567,
243
+ "eval_runtime": 6.4198,
244
+ "eval_samples_per_second": 47.353,
245
+ "eval_steps_per_second": 1.246,
246
+ "step": 30
247
+ },
248
+ {
249
+ "epoch": 0.62,
250
+ "grad_norm": 8.700516700744629,
251
+ "learning_rate": 4.4473684210526316e-05,
252
+ "loss": 0.1199,
253
+ "step": 31
254
+ },
255
+ {
256
+ "epoch": 0.64,
257
+ "grad_norm": 6.926602840423584,
258
+ "learning_rate": 4.421052631578947e-05,
259
+ "loss": 0.1471,
260
+ "step": 32
261
+ },
262
+ {
263
+ "epoch": 0.66,
264
+ "grad_norm": 8.461554527282715,
265
+ "learning_rate": 4.394736842105263e-05,
266
+ "loss": 0.0649,
267
+ "step": 33
268
+ },
269
+ {
270
+ "epoch": 0.68,
271
+ "grad_norm": 7.129871845245361,
272
+ "learning_rate": 4.368421052631579e-05,
273
+ "loss": 0.1167,
274
+ "step": 34
275
+ },
276
+ {
277
+ "epoch": 0.7,
278
+ "grad_norm": 20.945388793945312,
279
+ "learning_rate": 4.342105263157895e-05,
280
+ "loss": 0.3279,
281
+ "step": 35
282
+ },
283
+ {
284
+ "epoch": 0.72,
285
+ "grad_norm": 4.466580390930176,
286
+ "learning_rate": 4.3157894736842105e-05,
287
+ "loss": 0.0517,
288
+ "step": 36
289
+ },
290
+ {
291
+ "epoch": 0.74,
292
+ "grad_norm": 9.97375202178955,
293
+ "learning_rate": 4.289473684210527e-05,
294
+ "loss": 0.1241,
295
+ "step": 37
296
+ },
297
+ {
298
+ "epoch": 0.76,
299
+ "grad_norm": 6.459863662719727,
300
+ "learning_rate": 4.2631578947368425e-05,
301
+ "loss": 0.0859,
302
+ "step": 38
303
+ },
304
+ {
305
+ "epoch": 0.78,
306
+ "grad_norm": 11.704880714416504,
307
+ "learning_rate": 4.236842105263158e-05,
308
+ "loss": 0.1252,
309
+ "step": 39
310
+ },
311
+ {
312
+ "epoch": 0.8,
313
+ "grad_norm": 7.90261697769165,
314
+ "learning_rate": 4.210526315789474e-05,
315
+ "loss": 0.0541,
316
+ "step": 40
317
+ },
318
+ {
319
+ "epoch": 0.8,
320
+ "eval_accuracy": 0.9671052631578947,
321
+ "eval_loss": 0.06965293735265732,
322
+ "eval_runtime": 6.6281,
323
+ "eval_samples_per_second": 45.865,
324
+ "eval_steps_per_second": 1.207,
325
+ "step": 40
326
+ },
327
+ {
328
+ "epoch": 0.82,
329
+ "grad_norm": 2.8429932594299316,
330
+ "learning_rate": 4.18421052631579e-05,
331
+ "loss": 0.0325,
332
+ "step": 41
333
+ },
334
+ {
335
+ "epoch": 0.84,
336
+ "grad_norm": 10.533605575561523,
337
+ "learning_rate": 4.157894736842106e-05,
338
+ "loss": 0.1836,
339
+ "step": 42
340
+ },
341
+ {
342
+ "epoch": 0.86,
343
+ "grad_norm": 12.08105754852295,
344
+ "learning_rate": 4.1315789473684214e-05,
345
+ "loss": 0.224,
346
+ "step": 43
347
+ },
348
+ {
349
+ "epoch": 0.88,
350
+ "grad_norm": 9.006488800048828,
351
+ "learning_rate": 4.105263157894737e-05,
352
+ "loss": 0.2139,
353
+ "step": 44
354
+ },
355
+ {
356
+ "epoch": 0.9,
357
+ "grad_norm": 7.3875956535339355,
358
+ "learning_rate": 4.078947368421053e-05,
359
+ "loss": 0.088,
360
+ "step": 45
361
+ },
362
+ {
363
+ "epoch": 0.92,
364
+ "grad_norm": 13.019746780395508,
365
+ "learning_rate": 4.0526315789473684e-05,
366
+ "loss": 0.2363,
367
+ "step": 46
368
+ },
369
+ {
370
+ "epoch": 0.94,
371
+ "grad_norm": 37.95082473754883,
372
+ "learning_rate": 4.026315789473684e-05,
373
+ "loss": 0.3728,
374
+ "step": 47
375
+ },
376
+ {
377
+ "epoch": 0.96,
378
+ "grad_norm": 13.99156665802002,
379
+ "learning_rate": 4e-05,
380
+ "loss": 0.1376,
381
+ "step": 48
382
+ },
383
+ {
384
+ "epoch": 0.98,
385
+ "grad_norm": 5.063250541687012,
386
+ "learning_rate": 3.973684210526316e-05,
387
+ "loss": 0.0832,
388
+ "step": 49
389
+ },
390
+ {
391
+ "epoch": 1.0,
392
+ "grad_norm": 13.046541213989258,
393
+ "learning_rate": 3.9473684210526316e-05,
394
+ "loss": 0.1096,
395
+ "step": 50
396
+ },
397
+ {
398
+ "epoch": 1.0,
399
+ "eval_accuracy": 0.9572368421052632,
400
+ "eval_loss": 0.12372467666864395,
401
+ "eval_runtime": 6.8765,
402
+ "eval_samples_per_second": 44.209,
403
+ "eval_steps_per_second": 1.163,
404
+ "step": 50
405
+ },
406
+ {
407
+ "epoch": 1.02,
408
+ "grad_norm": 4.865447998046875,
409
+ "learning_rate": 3.921052631578947e-05,
410
+ "loss": 0.0366,
411
+ "step": 51
412
+ },
413
+ {
414
+ "epoch": 1.04,
415
+ "grad_norm": 12.430005073547363,
416
+ "learning_rate": 3.894736842105263e-05,
417
+ "loss": 0.1301,
418
+ "step": 52
419
+ },
420
+ {
421
+ "epoch": 1.06,
422
+ "grad_norm": 2.1291539669036865,
423
+ "learning_rate": 3.868421052631579e-05,
424
+ "loss": 0.0168,
425
+ "step": 53
426
+ },
427
+ {
428
+ "epoch": 1.08,
429
+ "grad_norm": 1.6922448873519897,
430
+ "learning_rate": 3.842105263157895e-05,
431
+ "loss": 0.0074,
432
+ "step": 54
433
+ },
434
+ {
435
+ "epoch": 1.1,
436
+ "grad_norm": 5.851077556610107,
437
+ "learning_rate": 3.815789473684211e-05,
438
+ "loss": 0.0216,
439
+ "step": 55
440
+ },
441
+ {
442
+ "epoch": 1.12,
443
+ "grad_norm": 21.35593605041504,
444
+ "learning_rate": 3.789473684210527e-05,
445
+ "loss": 0.1171,
446
+ "step": 56
447
+ },
448
+ {
449
+ "epoch": 1.14,
450
+ "grad_norm": 9.791412353515625,
451
+ "learning_rate": 3.7631578947368425e-05,
452
+ "loss": 0.1688,
453
+ "step": 57
454
+ },
455
+ {
456
+ "epoch": 1.16,
457
+ "grad_norm": 3.2542812824249268,
458
+ "learning_rate": 3.736842105263158e-05,
459
+ "loss": 0.0236,
460
+ "step": 58
461
+ },
462
+ {
463
+ "epoch": 1.18,
464
+ "grad_norm": 9.557928085327148,
465
+ "learning_rate": 3.710526315789474e-05,
466
+ "loss": 0.1782,
467
+ "step": 59
468
+ },
469
+ {
470
+ "epoch": 1.2,
471
+ "grad_norm": 12.514122009277344,
472
+ "learning_rate": 3.6842105263157895e-05,
473
+ "loss": 0.0905,
474
+ "step": 60
475
+ },
476
+ {
477
+ "epoch": 1.2,
478
+ "eval_accuracy": 0.9967105263157895,
479
+ "eval_loss": 0.027645185589790344,
480
+ "eval_runtime": 7.1325,
481
+ "eval_samples_per_second": 42.622,
482
+ "eval_steps_per_second": 1.122,
483
+ "step": 60
484
+ },
485
+ {
486
+ "epoch": 1.22,
487
+ "grad_norm": 4.434255599975586,
488
+ "learning_rate": 3.657894736842106e-05,
489
+ "loss": 0.0276,
490
+ "step": 61
491
+ },
492
+ {
493
+ "epoch": 1.24,
494
+ "grad_norm": 6.304673671722412,
495
+ "learning_rate": 3.6315789473684214e-05,
496
+ "loss": 0.0508,
497
+ "step": 62
498
+ },
499
+ {
500
+ "epoch": 1.26,
501
+ "grad_norm": 3.844200372695923,
502
+ "learning_rate": 3.605263157894737e-05,
503
+ "loss": 0.0199,
504
+ "step": 63
505
+ },
506
+ {
507
+ "epoch": 1.28,
508
+ "grad_norm": 0.8185235261917114,
509
+ "learning_rate": 3.578947368421053e-05,
510
+ "loss": 0.0069,
511
+ "step": 64
512
+ },
513
+ {
514
+ "epoch": 1.3,
515
+ "grad_norm": 6.909903049468994,
516
+ "learning_rate": 3.5526315789473684e-05,
517
+ "loss": 0.0597,
518
+ "step": 65
519
+ },
520
+ {
521
+ "epoch": 1.32,
522
+ "grad_norm": 3.5652379989624023,
523
+ "learning_rate": 3.526315789473684e-05,
524
+ "loss": 0.0141,
525
+ "step": 66
526
+ },
527
+ {
528
+ "epoch": 1.34,
529
+ "grad_norm": 11.72857666015625,
530
+ "learning_rate": 3.5e-05,
531
+ "loss": 0.1865,
532
+ "step": 67
533
+ },
534
+ {
535
+ "epoch": 1.36,
536
+ "grad_norm": 10.541884422302246,
537
+ "learning_rate": 3.473684210526316e-05,
538
+ "loss": 0.0606,
539
+ "step": 68
540
+ },
541
+ {
542
+ "epoch": 1.38,
543
+ "grad_norm": 5.210526943206787,
544
+ "learning_rate": 3.447368421052632e-05,
545
+ "loss": 0.0276,
546
+ "step": 69
547
+ },
548
+ {
549
+ "epoch": 1.4,
550
+ "grad_norm": 1.0411546230316162,
551
+ "learning_rate": 3.421052631578947e-05,
552
+ "loss": 0.009,
553
+ "step": 70
554
+ },
555
+ {
556
+ "epoch": 1.4,
557
+ "eval_accuracy": 0.9901315789473685,
558
+ "eval_loss": 0.020390018820762634,
559
+ "eval_runtime": 7.2971,
560
+ "eval_samples_per_second": 41.66,
561
+ "eval_steps_per_second": 1.096,
562
+ "step": 70
563
+ },
564
+ {
565
+ "epoch": 1.42,
566
+ "grad_norm": 0.7153753638267517,
567
+ "learning_rate": 3.3947368421052636e-05,
568
+ "loss": 0.0049,
569
+ "step": 71
570
+ },
571
+ {
572
+ "epoch": 1.44,
573
+ "grad_norm": 4.828935623168945,
574
+ "learning_rate": 3.368421052631579e-05,
575
+ "loss": 0.0297,
576
+ "step": 72
577
+ },
578
+ {
579
+ "epoch": 1.46,
580
+ "grad_norm": 9.103313446044922,
581
+ "learning_rate": 3.342105263157895e-05,
582
+ "loss": 0.071,
583
+ "step": 73
584
+ },
585
+ {
586
+ "epoch": 1.48,
587
+ "grad_norm": 7.635372161865234,
588
+ "learning_rate": 3.3157894736842106e-05,
589
+ "loss": 0.0363,
590
+ "step": 74
591
+ },
592
+ {
593
+ "epoch": 1.5,
594
+ "grad_norm": 8.57288646697998,
595
+ "learning_rate": 3.289473684210527e-05,
596
+ "loss": 0.0566,
597
+ "step": 75
598
+ },
599
+ {
600
+ "epoch": 1.52,
601
+ "grad_norm": 8.89769172668457,
602
+ "learning_rate": 3.2631578947368426e-05,
603
+ "loss": 0.0213,
604
+ "step": 76
605
+ },
606
+ {
607
+ "epoch": 1.54,
608
+ "grad_norm": 2.804738998413086,
609
+ "learning_rate": 3.236842105263158e-05,
610
+ "loss": 0.0138,
611
+ "step": 77
612
+ },
613
+ {
614
+ "epoch": 1.56,
615
+ "grad_norm": 4.841202259063721,
616
+ "learning_rate": 3.210526315789474e-05,
617
+ "loss": 0.0181,
618
+ "step": 78
619
+ },
620
+ {
621
+ "epoch": 1.58,
622
+ "grad_norm": 7.109033584594727,
623
+ "learning_rate": 3.1842105263157895e-05,
624
+ "loss": 0.0332,
625
+ "step": 79
626
+ },
627
+ {
628
+ "epoch": 1.6,
629
+ "grad_norm": 13.540718078613281,
630
+ "learning_rate": 3.157894736842105e-05,
631
+ "loss": 0.1667,
632
+ "step": 80
633
+ },
634
+ {
635
+ "epoch": 1.6,
636
+ "eval_accuracy": 0.9703947368421053,
637
+ "eval_loss": 0.10031529515981674,
638
+ "eval_runtime": 6.7491,
639
+ "eval_samples_per_second": 45.043,
640
+ "eval_steps_per_second": 1.185,
641
+ "step": 80
642
+ },
643
+ {
644
+ "epoch": 1.62,
645
+ "grad_norm": 30.279451370239258,
646
+ "learning_rate": 3.131578947368421e-05,
647
+ "loss": 0.1139,
648
+ "step": 81
649
+ },
650
+ {
651
+ "epoch": 1.64,
652
+ "grad_norm": 7.229616165161133,
653
+ "learning_rate": 3.105263157894737e-05,
654
+ "loss": 0.0284,
655
+ "step": 82
656
+ },
657
+ {
658
+ "epoch": 1.66,
659
+ "grad_norm": 0.327736496925354,
660
+ "learning_rate": 3.078947368421053e-05,
661
+ "loss": 0.0032,
662
+ "step": 83
663
+ },
664
+ {
665
+ "epoch": 1.68,
666
+ "grad_norm": 0.15309709310531616,
667
+ "learning_rate": 3.0526315789473684e-05,
668
+ "loss": 0.0011,
669
+ "step": 84
670
+ },
671
+ {
672
+ "epoch": 1.7,
673
+ "grad_norm": 13.175533294677734,
674
+ "learning_rate": 3.0263157894736844e-05,
675
+ "loss": 0.039,
676
+ "step": 85
677
+ },
678
+ {
679
+ "epoch": 1.72,
680
+ "grad_norm": 24.578662872314453,
681
+ "learning_rate": 3e-05,
682
+ "loss": 0.1834,
683
+ "step": 86
684
+ },
685
+ {
686
+ "epoch": 1.74,
687
+ "grad_norm": 6.690978527069092,
688
+ "learning_rate": 2.9736842105263157e-05,
689
+ "loss": 0.0214,
690
+ "step": 87
691
+ },
692
+ {
693
+ "epoch": 1.76,
694
+ "grad_norm": 21.097963333129883,
695
+ "learning_rate": 2.9473684210526314e-05,
696
+ "loss": 0.1569,
697
+ "step": 88
698
+ },
699
+ {
700
+ "epoch": 1.78,
701
+ "grad_norm": 7.984706401824951,
702
+ "learning_rate": 2.9210526315789477e-05,
703
+ "loss": 0.0326,
704
+ "step": 89
705
+ },
706
+ {
707
+ "epoch": 1.8,
708
+ "grad_norm": 11.008134841918945,
709
+ "learning_rate": 2.8947368421052634e-05,
710
+ "loss": 0.0602,
711
+ "step": 90
712
+ },
713
+ {
714
+ "epoch": 1.8,
715
+ "eval_accuracy": 0.9868421052631579,
716
+ "eval_loss": 0.052807241678237915,
717
+ "eval_runtime": 7.1111,
718
+ "eval_samples_per_second": 42.75,
719
+ "eval_steps_per_second": 1.125,
720
+ "step": 90
721
+ },
722
+ {
723
+ "epoch": 1.82,
724
+ "grad_norm": 21.149032592773438,
725
+ "learning_rate": 2.868421052631579e-05,
726
+ "loss": 0.0265,
727
+ "step": 91
728
+ },
729
+ {
730
+ "epoch": 1.84,
731
+ "grad_norm": 3.4383814334869385,
732
+ "learning_rate": 2.842105263157895e-05,
733
+ "loss": 0.0059,
734
+ "step": 92
735
+ },
736
+ {
737
+ "epoch": 1.86,
738
+ "grad_norm": 6.291805267333984,
739
+ "learning_rate": 2.8157894736842106e-05,
740
+ "loss": 0.0177,
741
+ "step": 93
742
+ },
743
+ {
744
+ "epoch": 1.88,
745
+ "grad_norm": 1.5197041034698486,
746
+ "learning_rate": 2.7894736842105263e-05,
747
+ "loss": 0.0065,
748
+ "step": 94
749
+ },
750
+ {
751
+ "epoch": 1.9,
752
+ "grad_norm": 0.47119140625,
753
+ "learning_rate": 2.7631578947368426e-05,
754
+ "loss": 0.0018,
755
+ "step": 95
756
+ },
757
+ {
758
+ "epoch": 1.92,
759
+ "grad_norm": 7.311519145965576,
760
+ "learning_rate": 2.7368421052631583e-05,
761
+ "loss": 0.0235,
762
+ "step": 96
763
+ },
764
+ {
765
+ "epoch": 1.94,
766
+ "grad_norm": 53.3145866394043,
767
+ "learning_rate": 2.710526315789474e-05,
768
+ "loss": 0.1186,
769
+ "step": 97
770
+ },
771
+ {
772
+ "epoch": 1.96,
773
+ "grad_norm": 13.420350074768066,
774
+ "learning_rate": 2.6842105263157896e-05,
775
+ "loss": 0.2332,
776
+ "step": 98
777
+ },
778
+ {
779
+ "epoch": 1.98,
780
+ "grad_norm": 13.3897123336792,
781
+ "learning_rate": 2.6578947368421052e-05,
782
+ "loss": 0.0759,
783
+ "step": 99
784
+ },
785
+ {
786
+ "epoch": 2.0,
787
+ "grad_norm": 1.8416231870651245,
788
+ "learning_rate": 2.6315789473684212e-05,
789
+ "loss": 0.0076,
790
+ "step": 100
791
+ },
792
+ {
793
+ "epoch": 2.0,
794
+ "eval_accuracy": 0.993421052631579,
795
+ "eval_loss": 0.027568014338612556,
796
+ "eval_runtime": 7.0992,
797
+ "eval_samples_per_second": 42.822,
798
+ "eval_steps_per_second": 1.127,
799
+ "step": 100
800
+ },
801
+ {
802
+ "epoch": 2.02,
803
+ "grad_norm": 6.973387241363525,
804
+ "learning_rate": 2.605263157894737e-05,
805
+ "loss": 0.0255,
806
+ "step": 101
807
+ },
808
+ {
809
+ "epoch": 2.04,
810
+ "grad_norm": 0.33660009503364563,
811
+ "learning_rate": 2.578947368421053e-05,
812
+ "loss": 0.0012,
813
+ "step": 102
814
+ },
815
+ {
816
+ "epoch": 2.06,
817
+ "grad_norm": 11.269298553466797,
818
+ "learning_rate": 2.5526315789473688e-05,
819
+ "loss": 0.1097,
820
+ "step": 103
821
+ },
822
+ {
823
+ "epoch": 2.08,
824
+ "grad_norm": 7.602676868438721,
825
+ "learning_rate": 2.5263157894736845e-05,
826
+ "loss": 0.034,
827
+ "step": 104
828
+ },
829
+ {
830
+ "epoch": 2.1,
831
+ "grad_norm": 7.900285720825195,
832
+ "learning_rate": 2.5e-05,
833
+ "loss": 0.0399,
834
+ "step": 105
835
+ },
836
+ {
837
+ "epoch": 2.12,
838
+ "grad_norm": 4.07220983505249,
839
+ "learning_rate": 2.4736842105263158e-05,
840
+ "loss": 0.0055,
841
+ "step": 106
842
+ },
843
+ {
844
+ "epoch": 2.14,
845
+ "grad_norm": 0.09085848927497864,
846
+ "learning_rate": 2.4473684210526318e-05,
847
+ "loss": 0.0005,
848
+ "step": 107
849
+ },
850
+ {
851
+ "epoch": 2.16,
852
+ "grad_norm": 0.5666660666465759,
853
+ "learning_rate": 2.4210526315789474e-05,
854
+ "loss": 0.0029,
855
+ "step": 108
856
+ },
857
+ {
858
+ "epoch": 2.18,
859
+ "grad_norm": 12.90062427520752,
860
+ "learning_rate": 2.394736842105263e-05,
861
+ "loss": 0.051,
862
+ "step": 109
863
+ },
864
+ {
865
+ "epoch": 2.2,
866
+ "grad_norm": 0.24952377378940582,
867
+ "learning_rate": 2.368421052631579e-05,
868
+ "loss": 0.0007,
869
+ "step": 110
870
+ },
871
+ {
872
+ "epoch": 2.2,
873
+ "eval_accuracy": 0.9835526315789473,
874
+ "eval_loss": 0.05723079293966293,
875
+ "eval_runtime": 7.2757,
876
+ "eval_samples_per_second": 41.783,
877
+ "eval_steps_per_second": 1.1,
878
+ "step": 110
879
+ },
880
+ {
881
+ "epoch": 2.22,
882
+ "grad_norm": 2.654731512069702,
883
+ "learning_rate": 2.342105263157895e-05,
884
+ "loss": 0.0036,
885
+ "step": 111
886
+ },
887
+ {
888
+ "epoch": 2.24,
889
+ "grad_norm": 0.5114130973815918,
890
+ "learning_rate": 2.3157894736842107e-05,
891
+ "loss": 0.0012,
892
+ "step": 112
893
+ },
894
+ {
895
+ "epoch": 2.26,
896
+ "grad_norm": 9.55749797821045,
897
+ "learning_rate": 2.2894736842105263e-05,
898
+ "loss": 0.0195,
899
+ "step": 113
900
+ },
901
+ {
902
+ "epoch": 2.28,
903
+ "grad_norm": 11.102432250976562,
904
+ "learning_rate": 2.2631578947368423e-05,
905
+ "loss": 0.0204,
906
+ "step": 114
907
+ },
908
+ {
909
+ "epoch": 2.3,
910
+ "grad_norm": 2.684544324874878,
911
+ "learning_rate": 2.236842105263158e-05,
912
+ "loss": 0.0047,
913
+ "step": 115
914
+ },
915
+ {
916
+ "epoch": 2.32,
917
+ "grad_norm": 0.22236469388008118,
918
+ "learning_rate": 2.2105263157894736e-05,
919
+ "loss": 0.0008,
920
+ "step": 116
921
+ },
922
+ {
923
+ "epoch": 2.34,
924
+ "grad_norm": 8.926434516906738,
925
+ "learning_rate": 2.1842105263157896e-05,
926
+ "loss": 0.0223,
927
+ "step": 117
928
+ },
929
+ {
930
+ "epoch": 2.36,
931
+ "grad_norm": 3.3315141201019287,
932
+ "learning_rate": 2.1578947368421053e-05,
933
+ "loss": 0.0117,
934
+ "step": 118
935
+ },
936
+ {
937
+ "epoch": 2.38,
938
+ "grad_norm": 10.430353164672852,
939
+ "learning_rate": 2.1315789473684212e-05,
940
+ "loss": 0.0303,
941
+ "step": 119
942
+ },
943
+ {
944
+ "epoch": 2.4,
945
+ "grad_norm": 0.15297500789165497,
946
+ "learning_rate": 2.105263157894737e-05,
947
+ "loss": 0.0008,
948
+ "step": 120
949
+ },
950
+ {
951
+ "epoch": 2.4,
952
+ "eval_accuracy": 0.9967105263157895,
953
+ "eval_loss": 0.009093067608773708,
954
+ "eval_runtime": 6.8131,
955
+ "eval_samples_per_second": 44.62,
956
+ "eval_steps_per_second": 1.174,
957
+ "step": 120
958
+ },
959
+ {
960
+ "epoch": 2.42,
961
+ "grad_norm": 0.6949332356452942,
962
+ "learning_rate": 2.078947368421053e-05,
963
+ "loss": 0.0018,
964
+ "step": 121
965
+ },
966
+ {
967
+ "epoch": 2.44,
968
+ "grad_norm": 0.10584941506385803,
969
+ "learning_rate": 2.0526315789473685e-05,
970
+ "loss": 0.0006,
971
+ "step": 122
972
+ },
973
+ {
974
+ "epoch": 2.46,
975
+ "grad_norm": 2.782750368118286,
976
+ "learning_rate": 2.0263157894736842e-05,
977
+ "loss": 0.0064,
978
+ "step": 123
979
+ },
980
+ {
981
+ "epoch": 2.48,
982
+ "grad_norm": 0.1843283325433731,
983
+ "learning_rate": 2e-05,
984
+ "loss": 0.0007,
985
+ "step": 124
986
+ },
987
+ {
988
+ "epoch": 2.5,
989
+ "grad_norm": 19.077423095703125,
990
+ "learning_rate": 1.9736842105263158e-05,
991
+ "loss": 0.1095,
992
+ "step": 125
993
+ },
994
+ {
995
+ "epoch": 2.52,
996
+ "grad_norm": 9.631857872009277,
997
+ "learning_rate": 1.9473684210526315e-05,
998
+ "loss": 0.0286,
999
+ "step": 126
1000
+ },
1001
+ {
1002
+ "epoch": 2.54,
1003
+ "grad_norm": 2.46711802482605,
1004
+ "learning_rate": 1.9210526315789474e-05,
1005
+ "loss": 0.0042,
1006
+ "step": 127
1007
+ },
1008
+ {
1009
+ "epoch": 2.56,
1010
+ "grad_norm": 0.9471967220306396,
1011
+ "learning_rate": 1.8947368421052634e-05,
1012
+ "loss": 0.0038,
1013
+ "step": 128
1014
+ },
1015
+ {
1016
+ "epoch": 2.58,
1017
+ "grad_norm": 0.1509537547826767,
1018
+ "learning_rate": 1.868421052631579e-05,
1019
+ "loss": 0.0009,
1020
+ "step": 129
1021
+ },
1022
+ {
1023
+ "epoch": 2.6,
1024
+ "grad_norm": 16.95519256591797,
1025
+ "learning_rate": 1.8421052631578947e-05,
1026
+ "loss": 0.1277,
1027
+ "step": 130
1028
+ },
1029
+ {
1030
+ "epoch": 2.6,
1031
+ "eval_accuracy": 0.9967105263157895,
1032
+ "eval_loss": 0.004848334938287735,
1033
+ "eval_runtime": 7.1393,
1034
+ "eval_samples_per_second": 42.581,
1035
+ "eval_steps_per_second": 1.121,
1036
+ "step": 130
1037
+ },
1038
+ {
1039
+ "epoch": 2.62,
1040
+ "grad_norm": 0.3852033317089081,
1041
+ "learning_rate": 1.8157894736842107e-05,
1042
+ "loss": 0.0009,
1043
+ "step": 131
1044
+ },
1045
+ {
1046
+ "epoch": 2.64,
1047
+ "grad_norm": 0.23005267977714539,
1048
+ "learning_rate": 1.7894736842105264e-05,
1049
+ "loss": 0.0006,
1050
+ "step": 132
1051
+ },
1052
+ {
1053
+ "epoch": 2.66,
1054
+ "grad_norm": 2.9750654697418213,
1055
+ "learning_rate": 1.763157894736842e-05,
1056
+ "loss": 0.0096,
1057
+ "step": 133
1058
+ },
1059
+ {
1060
+ "epoch": 2.68,
1061
+ "grad_norm": 6.330101490020752,
1062
+ "learning_rate": 1.736842105263158e-05,
1063
+ "loss": 0.0224,
1064
+ "step": 134
1065
+ },
1066
+ {
1067
+ "epoch": 2.7,
1068
+ "grad_norm": 0.2537049949169159,
1069
+ "learning_rate": 1.7105263157894737e-05,
1070
+ "loss": 0.001,
1071
+ "step": 135
1072
+ },
1073
+ {
1074
+ "epoch": 2.72,
1075
+ "grad_norm": 0.49432969093322754,
1076
+ "learning_rate": 1.6842105263157896e-05,
1077
+ "loss": 0.002,
1078
+ "step": 136
1079
+ },
1080
+ {
1081
+ "epoch": 2.74,
1082
+ "grad_norm": 0.878919243812561,
1083
+ "learning_rate": 1.6578947368421053e-05,
1084
+ "loss": 0.0022,
1085
+ "step": 137
1086
+ },
1087
+ {
1088
+ "epoch": 2.76,
1089
+ "grad_norm": 0.4538193941116333,
1090
+ "learning_rate": 1.6315789473684213e-05,
1091
+ "loss": 0.0016,
1092
+ "step": 138
1093
+ },
1094
+ {
1095
+ "epoch": 2.78,
1096
+ "grad_norm": 13.346075057983398,
1097
+ "learning_rate": 1.605263157894737e-05,
1098
+ "loss": 0.0384,
1099
+ "step": 139
1100
+ },
1101
+ {
1102
+ "epoch": 2.8,
1103
+ "grad_norm": 0.42253851890563965,
1104
+ "learning_rate": 1.5789473684210526e-05,
1105
+ "loss": 0.0007,
1106
+ "step": 140
1107
+ },
1108
+ {
1109
+ "epoch": 2.8,
1110
+ "eval_accuracy": 0.9736842105263158,
1111
+ "eval_loss": 0.08703595399856567,
1112
+ "eval_runtime": 7.1737,
1113
+ "eval_samples_per_second": 42.377,
1114
+ "eval_steps_per_second": 1.115,
1115
+ "step": 140
1116
+ },
1117
+ {
1118
+ "epoch": 2.82,
1119
+ "grad_norm": 8.999384880065918,
1120
+ "learning_rate": 1.5526315789473686e-05,
1121
+ "loss": 0.041,
1122
+ "step": 141
1123
+ },
1124
+ {
1125
+ "epoch": 2.84,
1126
+ "grad_norm": 0.34943702816963196,
1127
+ "learning_rate": 1.5263157894736842e-05,
1128
+ "loss": 0.0008,
1129
+ "step": 142
1130
+ },
1131
+ {
1132
+ "epoch": 2.86,
1133
+ "grad_norm": 41.58428192138672,
1134
+ "learning_rate": 1.5e-05,
1135
+ "loss": 0.0352,
1136
+ "step": 143
1137
+ },
1138
+ {
1139
+ "epoch": 2.88,
1140
+ "grad_norm": 1.358328104019165,
1141
+ "learning_rate": 1.4736842105263157e-05,
1142
+ "loss": 0.004,
1143
+ "step": 144
1144
+ },
1145
+ {
1146
+ "epoch": 2.9,
1147
+ "grad_norm": 26.084732055664062,
1148
+ "learning_rate": 1.4473684210526317e-05,
1149
+ "loss": 0.1456,
1150
+ "step": 145
1151
+ },
1152
+ {
1153
+ "epoch": 2.92,
1154
+ "grad_norm": 4.426596164703369,
1155
+ "learning_rate": 1.4210526315789475e-05,
1156
+ "loss": 0.0056,
1157
+ "step": 146
1158
+ },
1159
+ {
1160
+ "epoch": 2.94,
1161
+ "grad_norm": 7.729344367980957,
1162
+ "learning_rate": 1.3947368421052631e-05,
1163
+ "loss": 0.0536,
1164
+ "step": 147
1165
+ },
1166
+ {
1167
+ "epoch": 2.96,
1168
+ "grad_norm": 0.49353259801864624,
1169
+ "learning_rate": 1.3684210526315791e-05,
1170
+ "loss": 0.0035,
1171
+ "step": 148
1172
+ },
1173
+ {
1174
+ "epoch": 2.98,
1175
+ "grad_norm": 15.301690101623535,
1176
+ "learning_rate": 1.3421052631578948e-05,
1177
+ "loss": 0.0138,
1178
+ "step": 149
1179
+ },
1180
+ {
1181
+ "epoch": 3.0,
1182
+ "grad_norm": 0.520106852054596,
1183
+ "learning_rate": 1.3157894736842106e-05,
1184
+ "loss": 0.0008,
1185
+ "step": 150
1186
+ },
1187
+ {
1188
+ "epoch": 3.0,
1189
+ "eval_accuracy": 0.993421052631579,
1190
+ "eval_loss": 0.005907059647142887,
1191
+ "eval_runtime": 7.2044,
1192
+ "eval_samples_per_second": 42.197,
1193
+ "eval_steps_per_second": 1.11,
1194
+ "step": 150
1195
+ },
1196
+ {
1197
+ "epoch": 3.02,
1198
+ "grad_norm": 0.7136516571044922,
1199
+ "learning_rate": 1.2894736842105264e-05,
1200
+ "loss": 0.0028,
1201
+ "step": 151
1202
+ },
1203
+ {
1204
+ "epoch": 3.04,
1205
+ "grad_norm": 0.28995925188064575,
1206
+ "learning_rate": 1.2631578947368422e-05,
1207
+ "loss": 0.0006,
1208
+ "step": 152
1209
+ },
1210
+ {
1211
+ "epoch": 3.06,
1212
+ "grad_norm": 9.869847297668457,
1213
+ "learning_rate": 1.2368421052631579e-05,
1214
+ "loss": 0.0342,
1215
+ "step": 153
1216
+ },
1217
+ {
1218
+ "epoch": 3.08,
1219
+ "grad_norm": 0.20523004233837128,
1220
+ "learning_rate": 1.2105263157894737e-05,
1221
+ "loss": 0.0005,
1222
+ "step": 154
1223
+ },
1224
+ {
1225
+ "epoch": 3.1,
1226
+ "grad_norm": 8.038060188293457,
1227
+ "learning_rate": 1.1842105263157895e-05,
1228
+ "loss": 0.0178,
1229
+ "step": 155
1230
+ },
1231
+ {
1232
+ "epoch": 3.12,
1233
+ "grad_norm": 1.2011505365371704,
1234
+ "learning_rate": 1.1578947368421053e-05,
1235
+ "loss": 0.002,
1236
+ "step": 156
1237
+ },
1238
+ {
1239
+ "epoch": 3.14,
1240
+ "grad_norm": 1.0488544702529907,
1241
+ "learning_rate": 1.1315789473684212e-05,
1242
+ "loss": 0.0025,
1243
+ "step": 157
1244
+ },
1245
+ {
1246
+ "epoch": 3.16,
1247
+ "grad_norm": 0.24755439162254333,
1248
+ "learning_rate": 1.1052631578947368e-05,
1249
+ "loss": 0.0005,
1250
+ "step": 158
1251
+ },
1252
+ {
1253
+ "epoch": 3.18,
1254
+ "grad_norm": 0.37720346450805664,
1255
+ "learning_rate": 1.0789473684210526e-05,
1256
+ "loss": 0.0007,
1257
+ "step": 159
1258
+ },
1259
+ {
1260
+ "epoch": 3.2,
1261
+ "grad_norm": 0.054420772939920425,
1262
+ "learning_rate": 1.0526315789473684e-05,
1263
+ "loss": 0.0003,
1264
+ "step": 160
1265
+ },
1266
+ {
1267
+ "epoch": 3.2,
1268
+ "eval_accuracy": 0.9967105263157895,
1269
+ "eval_loss": 0.012113516218960285,
1270
+ "eval_runtime": 7.4779,
1271
+ "eval_samples_per_second": 40.653,
1272
+ "eval_steps_per_second": 1.07,
1273
+ "step": 160
1274
+ },
1275
+ {
1276
+ "epoch": 3.22,
1277
+ "grad_norm": 3.170163631439209,
1278
+ "learning_rate": 1.0263157894736843e-05,
1279
+ "loss": 0.0048,
1280
+ "step": 161
1281
+ },
1282
+ {
1283
+ "epoch": 3.24,
1284
+ "grad_norm": 9.659165382385254,
1285
+ "learning_rate": 1e-05,
1286
+ "loss": 0.006,
1287
+ "step": 162
1288
+ },
1289
+ {
1290
+ "epoch": 3.26,
1291
+ "grad_norm": 0.07884739339351654,
1292
+ "learning_rate": 9.736842105263157e-06,
1293
+ "loss": 0.0003,
1294
+ "step": 163
1295
+ },
1296
+ {
1297
+ "epoch": 3.28,
1298
+ "grad_norm": 1.1162643432617188,
1299
+ "learning_rate": 9.473684210526317e-06,
1300
+ "loss": 0.0019,
1301
+ "step": 164
1302
+ },
1303
+ {
1304
+ "epoch": 3.3,
1305
+ "grad_norm": 0.3206580877304077,
1306
+ "learning_rate": 9.210526315789474e-06,
1307
+ "loss": 0.001,
1308
+ "step": 165
1309
+ },
1310
+ {
1311
+ "epoch": 3.32,
1312
+ "grad_norm": 15.26516056060791,
1313
+ "learning_rate": 8.947368421052632e-06,
1314
+ "loss": 0.0467,
1315
+ "step": 166
1316
+ },
1317
+ {
1318
+ "epoch": 3.34,
1319
+ "grad_norm": 0.6477659940719604,
1320
+ "learning_rate": 8.68421052631579e-06,
1321
+ "loss": 0.002,
1322
+ "step": 167
1323
+ },
1324
+ {
1325
+ "epoch": 3.36,
1326
+ "grad_norm": 16.165756225585938,
1327
+ "learning_rate": 8.421052631578948e-06,
1328
+ "loss": 0.0486,
1329
+ "step": 168
1330
+ },
1331
+ {
1332
+ "epoch": 3.38,
1333
+ "grad_norm": 0.3942013382911682,
1334
+ "learning_rate": 8.157894736842106e-06,
1335
+ "loss": 0.001,
1336
+ "step": 169
1337
+ },
1338
+ {
1339
+ "epoch": 3.4,
1340
+ "grad_norm": 0.2746890187263489,
1341
+ "learning_rate": 7.894736842105263e-06,
1342
+ "loss": 0.0008,
1343
+ "step": 170
1344
+ },
1345
+ {
1346
+ "epoch": 3.4,
1347
+ "eval_accuracy": 0.993421052631579,
1348
+ "eval_loss": 0.02530418336391449,
1349
+ "eval_runtime": 6.8199,
1350
+ "eval_samples_per_second": 44.575,
1351
+ "eval_steps_per_second": 1.173,
1352
+ "step": 170
1353
+ },
1354
+ {
1355
+ "epoch": 3.42,
1356
+ "grad_norm": 0.1466667354106903,
1357
+ "learning_rate": 7.631578947368421e-06,
1358
+ "loss": 0.0006,
1359
+ "step": 171
1360
+ },
1361
+ {
1362
+ "epoch": 3.44,
1363
+ "grad_norm": 0.35398414731025696,
1364
+ "learning_rate": 7.3684210526315784e-06,
1365
+ "loss": 0.0008,
1366
+ "step": 172
1367
+ },
1368
+ {
1369
+ "epoch": 3.46,
1370
+ "grad_norm": 0.03358185663819313,
1371
+ "learning_rate": 7.1052631578947375e-06,
1372
+ "loss": 0.0001,
1373
+ "step": 173
1374
+ },
1375
+ {
1376
+ "epoch": 3.48,
1377
+ "grad_norm": 0.04026187211275101,
1378
+ "learning_rate": 6.842105263157896e-06,
1379
+ "loss": 0.0002,
1380
+ "step": 174
1381
+ },
1382
+ {
1383
+ "epoch": 3.5,
1384
+ "grad_norm": 0.10377287119626999,
1385
+ "learning_rate": 6.578947368421053e-06,
1386
+ "loss": 0.0008,
1387
+ "step": 175
1388
+ },
1389
+ {
1390
+ "epoch": 3.52,
1391
+ "grad_norm": 3.1232450008392334,
1392
+ "learning_rate": 6.315789473684211e-06,
1393
+ "loss": 0.0062,
1394
+ "step": 176
1395
+ },
1396
+ {
1397
+ "epoch": 3.54,
1398
+ "grad_norm": 0.0659070760011673,
1399
+ "learning_rate": 6.0526315789473685e-06,
1400
+ "loss": 0.0002,
1401
+ "step": 177
1402
+ },
1403
+ {
1404
+ "epoch": 3.56,
1405
+ "grad_norm": 0.4386169910430908,
1406
+ "learning_rate": 5.789473684210527e-06,
1407
+ "loss": 0.0007,
1408
+ "step": 178
1409
+ },
1410
+ {
1411
+ "epoch": 3.58,
1412
+ "grad_norm": 0.09403068572282791,
1413
+ "learning_rate": 5.526315789473684e-06,
1414
+ "loss": 0.0001,
1415
+ "step": 179
1416
+ },
1417
+ {
1418
+ "epoch": 3.6,
1419
+ "grad_norm": 0.011226998642086983,
1420
+ "learning_rate": 5.263157894736842e-06,
1421
+ "loss": 0.0,
1422
+ "step": 180
1423
+ },
1424
+ {
1425
+ "epoch": 3.6,
1426
+ "eval_accuracy": 0.9967105263157895,
1427
+ "eval_loss": 0.012991190887987614,
1428
+ "eval_runtime": 7.2339,
1429
+ "eval_samples_per_second": 42.024,
1430
+ "eval_steps_per_second": 1.106,
1431
+ "step": 180
1432
+ },
1433
+ {
1434
+ "epoch": 3.62,
1435
+ "grad_norm": 0.3960583209991455,
1436
+ "learning_rate": 5e-06,
1437
+ "loss": 0.0013,
1438
+ "step": 181
1439
+ },
1440
+ {
1441
+ "epoch": 3.64,
1442
+ "grad_norm": 0.028123315423727036,
1443
+ "learning_rate": 4.736842105263159e-06,
1444
+ "loss": 0.0001,
1445
+ "step": 182
1446
+ },
1447
+ {
1448
+ "epoch": 3.66,
1449
+ "grad_norm": 3.7100679874420166,
1450
+ "learning_rate": 4.473684210526316e-06,
1451
+ "loss": 0.0062,
1452
+ "step": 183
1453
+ },
1454
+ {
1455
+ "epoch": 3.68,
1456
+ "grad_norm": 0.027926042675971985,
1457
+ "learning_rate": 4.210526315789474e-06,
1458
+ "loss": 0.0001,
1459
+ "step": 184
1460
+ },
1461
+ {
1462
+ "epoch": 3.7,
1463
+ "grad_norm": 1.727767825126648,
1464
+ "learning_rate": 3.9473684210526315e-06,
1465
+ "loss": 0.0022,
1466
+ "step": 185
1467
+ },
1468
+ {
1469
+ "epoch": 3.72,
1470
+ "grad_norm": 1.4793553352355957,
1471
+ "learning_rate": 3.6842105263157892e-06,
1472
+ "loss": 0.0011,
1473
+ "step": 186
1474
+ },
1475
+ {
1476
+ "epoch": 3.74,
1477
+ "grad_norm": 2.847693681716919,
1478
+ "learning_rate": 3.421052631578948e-06,
1479
+ "loss": 0.004,
1480
+ "step": 187
1481
+ },
1482
+ {
1483
+ "epoch": 3.76,
1484
+ "grad_norm": 0.017291821539402008,
1485
+ "learning_rate": 3.1578947368421056e-06,
1486
+ "loss": 0.0001,
1487
+ "step": 188
1488
+ },
1489
+ {
1490
+ "epoch": 3.78,
1491
+ "grad_norm": 0.6957990527153015,
1492
+ "learning_rate": 2.8947368421052634e-06,
1493
+ "loss": 0.001,
1494
+ "step": 189
1495
+ },
1496
+ {
1497
+ "epoch": 3.8,
1498
+ "grad_norm": 0.032589249312877655,
1499
+ "learning_rate": 2.631578947368421e-06,
1500
+ "loss": 0.0001,
1501
+ "step": 190
1502
+ },
1503
+ {
1504
+ "epoch": 3.8,
1505
+ "eval_accuracy": 0.993421052631579,
1506
+ "eval_loss": 0.009236541576683521,
1507
+ "eval_runtime": 6.8219,
1508
+ "eval_samples_per_second": 44.563,
1509
+ "eval_steps_per_second": 1.173,
1510
+ "step": 190
1511
+ }
1512
+ ],
1513
+ "logging_steps": 1,
1514
+ "max_steps": 200,
1515
+ "num_input_tokens_seen": 0,
1516
+ "num_train_epochs": 4,
1517
+ "save_steps": 10,
1518
+ "total_flos": 5.904904527684772e+17,
1519
+ "train_batch_size": 40,
1520
+ "trial_name": null,
1521
+ "trial_params": null
1522
+ }
checkpoint-190/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12326b817ccf86422554696b6e15d756b4174e67e313ecfbe14aadf3bb73244
3
+ size 4856
checkpoint-200/config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Organika/sdxl-detector",
3
+ "_num_labels": 2,
4
+ "architectures": [
5
+ "SwinForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 18,
12
+ 2
13
+ ],
14
+ "drop_path_rate": 0.1,
15
+ "embed_dim": 128,
16
+ "encoder_stride": 32,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.0,
19
+ "hidden_size": 1024,
20
+ "id2label": {
21
+ "0": "artificial",
22
+ "1": "human"
23
+ },
24
+ "image_size": 224,
25
+ "initializer_range": 0.02,
26
+ "label2id": {
27
+ "artificial": 0,
28
+ "human": 1
29
+ },
30
+ "layer_norm_eps": 1e-05,
31
+ "max_length": 128,
32
+ "mlp_ratio": 4.0,
33
+ "model_type": "swin",
34
+ "num_channels": 3,
35
+ "num_heads": [
36
+ 4,
37
+ 8,
38
+ 16,
39
+ 32
40
+ ],
41
+ "num_layers": 4,
42
+ "out_features": [
43
+ "stage4"
44
+ ],
45
+ "out_indices": [
46
+ 4
47
+ ],
48
+ "padding": "max_length",
49
+ "patch_size": 4,
50
+ "path_norm": true,
51
+ "problem_type": "single_label_classification",
52
+ "qkv_bias": true,
53
+ "stage_names": [
54
+ "stem",
55
+ "stage1",
56
+ "stage2",
57
+ "stage3",
58
+ "stage4"
59
+ ],
60
+ "torch_dtype": "float32",
61
+ "transformers_version": "4.39.3",
62
+ "use_absolute_embeddings": false,
63
+ "window_size": 7
64
+ }
checkpoint-200/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d1722542eb8997bf4925c0254af0bf6c6ac2c9b3499879ce5aaa0b4d3f48f83
3
+ size 347498816
checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4182967f6667786b57d0f91b8f1268d3194964249208ad46de82ad989737fc71
3
+ size 694317645
checkpoint-200/preprocessor_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format"
15
+ ],
16
+ "do_normalize": true,
17
+ "do_rescale": true,
18
+ "do_resize": true,
19
+ "image_mean": [
20
+ 0.485,
21
+ 0.456,
22
+ 0.406
23
+ ],
24
+ "image_processor_type": "ViTImageProcessor",
25
+ "image_std": [
26
+ 0.229,
27
+ 0.224,
28
+ 0.225
29
+ ],
30
+ "resample": 3,
31
+ "rescale_factor": 0.00392156862745098,
32
+ "size": {
33
+ "height": 224,
34
+ "width": 224
35
+ }
36
+ }
checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2bf14f56667191904a767265aae5cce753dc5f2ae8bc6452ff41d6d9ac99fc0
3
+ size 14244
checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a27b052646dcb561cbd68156c30bd466ce59bda64cf3c8eba9c3c1113af9827c
3
+ size 1064
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,1601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.004848334938287735,
3
+ "best_model_checkpoint": "ai_vs_real_image_detection/checkpoint-130",
4
+ "epoch": 4.0,
5
+ "eval_steps": 10,
6
+ "global_step": 200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 52.89345169067383,
14
+ "learning_rate": 5e-06,
15
+ "loss": 2.2763,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.04,
20
+ "grad_norm": 62.66461944580078,
21
+ "learning_rate": 1e-05,
22
+ "loss": 1.8254,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.06,
27
+ "grad_norm": 66.1015625,
28
+ "learning_rate": 1.5e-05,
29
+ "loss": 1.8906,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.08,
34
+ "grad_norm": 56.700233459472656,
35
+ "learning_rate": 2e-05,
36
+ "loss": 1.3529,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.1,
41
+ "grad_norm": 66.10917663574219,
42
+ "learning_rate": 2.5e-05,
43
+ "loss": 1.4672,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.12,
48
+ "grad_norm": 52.68284225463867,
49
+ "learning_rate": 3e-05,
50
+ "loss": 1.0578,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "grad_norm": 42.4580078125,
56
+ "learning_rate": 3.5e-05,
57
+ "loss": 1.1221,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.16,
62
+ "grad_norm": 64.87982940673828,
63
+ "learning_rate": 4e-05,
64
+ "loss": 0.9313,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.18,
69
+ "grad_norm": 56.227081298828125,
70
+ "learning_rate": 4.5e-05,
71
+ "loss": 0.644,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.2,
76
+ "grad_norm": 13.719300270080566,
77
+ "learning_rate": 5e-05,
78
+ "loss": 0.4305,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.2,
83
+ "eval_accuracy": 0.9144736842105263,
84
+ "eval_loss": 0.19980163872241974,
85
+ "eval_runtime": 6.726,
86
+ "eval_samples_per_second": 45.198,
87
+ "eval_steps_per_second": 1.189,
88
+ "step": 10
89
+ },
90
+ {
91
+ "epoch": 0.22,
92
+ "grad_norm": 16.044822692871094,
93
+ "learning_rate": 4.973684210526316e-05,
94
+ "loss": 0.3892,
95
+ "step": 11
96
+ },
97
+ {
98
+ "epoch": 0.24,
99
+ "grad_norm": 15.53343391418457,
100
+ "learning_rate": 4.9473684210526315e-05,
101
+ "loss": 0.3683,
102
+ "step": 12
103
+ },
104
+ {
105
+ "epoch": 0.26,
106
+ "grad_norm": 13.435264587402344,
107
+ "learning_rate": 4.921052631578947e-05,
108
+ "loss": 0.3594,
109
+ "step": 13
110
+ },
111
+ {
112
+ "epoch": 0.28,
113
+ "grad_norm": 17.45833969116211,
114
+ "learning_rate": 4.8947368421052635e-05,
115
+ "loss": 0.5304,
116
+ "step": 14
117
+ },
118
+ {
119
+ "epoch": 0.3,
120
+ "grad_norm": 14.26673412322998,
121
+ "learning_rate": 4.868421052631579e-05,
122
+ "loss": 0.4804,
123
+ "step": 15
124
+ },
125
+ {
126
+ "epoch": 0.32,
127
+ "grad_norm": 9.26322078704834,
128
+ "learning_rate": 4.842105263157895e-05,
129
+ "loss": 0.3633,
130
+ "step": 16
131
+ },
132
+ {
133
+ "epoch": 0.34,
134
+ "grad_norm": 8.355168342590332,
135
+ "learning_rate": 4.8157894736842105e-05,
136
+ "loss": 0.4711,
137
+ "step": 17
138
+ },
139
+ {
140
+ "epoch": 0.36,
141
+ "grad_norm": 8.733236312866211,
142
+ "learning_rate": 4.789473684210526e-05,
143
+ "loss": 0.3954,
144
+ "step": 18
145
+ },
146
+ {
147
+ "epoch": 0.38,
148
+ "grad_norm": 5.385647296905518,
149
+ "learning_rate": 4.7631578947368424e-05,
150
+ "loss": 0.1943,
151
+ "step": 19
152
+ },
153
+ {
154
+ "epoch": 0.4,
155
+ "grad_norm": 9.892962455749512,
156
+ "learning_rate": 4.736842105263158e-05,
157
+ "loss": 0.3209,
158
+ "step": 20
159
+ },
160
+ {
161
+ "epoch": 0.4,
162
+ "eval_accuracy": 0.8881578947368421,
163
+ "eval_loss": 0.24087880551815033,
164
+ "eval_runtime": 6.8213,
165
+ "eval_samples_per_second": 44.566,
166
+ "eval_steps_per_second": 1.173,
167
+ "step": 20
168
+ },
169
+ {
170
+ "epoch": 0.42,
171
+ "grad_norm": 5.46142578125,
172
+ "learning_rate": 4.7105263157894744e-05,
173
+ "loss": 0.1505,
174
+ "step": 21
175
+ },
176
+ {
177
+ "epoch": 0.44,
178
+ "grad_norm": 9.863460540771484,
179
+ "learning_rate": 4.68421052631579e-05,
180
+ "loss": 0.2481,
181
+ "step": 22
182
+ },
183
+ {
184
+ "epoch": 0.46,
185
+ "grad_norm": 7.595032691955566,
186
+ "learning_rate": 4.657894736842106e-05,
187
+ "loss": 0.2001,
188
+ "step": 23
189
+ },
190
+ {
191
+ "epoch": 0.48,
192
+ "grad_norm": 17.375865936279297,
193
+ "learning_rate": 4.6315789473684214e-05,
194
+ "loss": 0.336,
195
+ "step": 24
196
+ },
197
+ {
198
+ "epoch": 0.5,
199
+ "grad_norm": 5.597734451293945,
200
+ "learning_rate": 4.605263157894737e-05,
201
+ "loss": 0.1395,
202
+ "step": 25
203
+ },
204
+ {
205
+ "epoch": 0.52,
206
+ "grad_norm": 7.56685209274292,
207
+ "learning_rate": 4.5789473684210527e-05,
208
+ "loss": 0.1739,
209
+ "step": 26
210
+ },
211
+ {
212
+ "epoch": 0.54,
213
+ "grad_norm": 13.769929885864258,
214
+ "learning_rate": 4.552631578947369e-05,
215
+ "loss": 0.1959,
216
+ "step": 27
217
+ },
218
+ {
219
+ "epoch": 0.56,
220
+ "grad_norm": 10.677892684936523,
221
+ "learning_rate": 4.5263157894736846e-05,
222
+ "loss": 0.2015,
223
+ "step": 28
224
+ },
225
+ {
226
+ "epoch": 0.58,
227
+ "grad_norm": 3.645374059677124,
228
+ "learning_rate": 4.5e-05,
229
+ "loss": 0.0576,
230
+ "step": 29
231
+ },
232
+ {
233
+ "epoch": 0.6,
234
+ "grad_norm": 12.538244247436523,
235
+ "learning_rate": 4.473684210526316e-05,
236
+ "loss": 0.324,
237
+ "step": 30
238
+ },
239
+ {
240
+ "epoch": 0.6,
241
+ "eval_accuracy": 0.9802631578947368,
242
+ "eval_loss": 0.056003469973802567,
243
+ "eval_runtime": 6.4198,
244
+ "eval_samples_per_second": 47.353,
245
+ "eval_steps_per_second": 1.246,
246
+ "step": 30
247
+ },
248
+ {
249
+ "epoch": 0.62,
250
+ "grad_norm": 8.700516700744629,
251
+ "learning_rate": 4.4473684210526316e-05,
252
+ "loss": 0.1199,
253
+ "step": 31
254
+ },
255
+ {
256
+ "epoch": 0.64,
257
+ "grad_norm": 6.926602840423584,
258
+ "learning_rate": 4.421052631578947e-05,
259
+ "loss": 0.1471,
260
+ "step": 32
261
+ },
262
+ {
263
+ "epoch": 0.66,
264
+ "grad_norm": 8.461554527282715,
265
+ "learning_rate": 4.394736842105263e-05,
266
+ "loss": 0.0649,
267
+ "step": 33
268
+ },
269
+ {
270
+ "epoch": 0.68,
271
+ "grad_norm": 7.129871845245361,
272
+ "learning_rate": 4.368421052631579e-05,
273
+ "loss": 0.1167,
274
+ "step": 34
275
+ },
276
+ {
277
+ "epoch": 0.7,
278
+ "grad_norm": 20.945388793945312,
279
+ "learning_rate": 4.342105263157895e-05,
280
+ "loss": 0.3279,
281
+ "step": 35
282
+ },
283
+ {
284
+ "epoch": 0.72,
285
+ "grad_norm": 4.466580390930176,
286
+ "learning_rate": 4.3157894736842105e-05,
287
+ "loss": 0.0517,
288
+ "step": 36
289
+ },
290
+ {
291
+ "epoch": 0.74,
292
+ "grad_norm": 9.97375202178955,
293
+ "learning_rate": 4.289473684210527e-05,
294
+ "loss": 0.1241,
295
+ "step": 37
296
+ },
297
+ {
298
+ "epoch": 0.76,
299
+ "grad_norm": 6.459863662719727,
300
+ "learning_rate": 4.2631578947368425e-05,
301
+ "loss": 0.0859,
302
+ "step": 38
303
+ },
304
+ {
305
+ "epoch": 0.78,
306
+ "grad_norm": 11.704880714416504,
307
+ "learning_rate": 4.236842105263158e-05,
308
+ "loss": 0.1252,
309
+ "step": 39
310
+ },
311
+ {
312
+ "epoch": 0.8,
313
+ "grad_norm": 7.90261697769165,
314
+ "learning_rate": 4.210526315789474e-05,
315
+ "loss": 0.0541,
316
+ "step": 40
317
+ },
318
+ {
319
+ "epoch": 0.8,
320
+ "eval_accuracy": 0.9671052631578947,
321
+ "eval_loss": 0.06965293735265732,
322
+ "eval_runtime": 6.6281,
323
+ "eval_samples_per_second": 45.865,
324
+ "eval_steps_per_second": 1.207,
325
+ "step": 40
326
+ },
327
+ {
328
+ "epoch": 0.82,
329
+ "grad_norm": 2.8429932594299316,
330
+ "learning_rate": 4.18421052631579e-05,
331
+ "loss": 0.0325,
332
+ "step": 41
333
+ },
334
+ {
335
+ "epoch": 0.84,
336
+ "grad_norm": 10.533605575561523,
337
+ "learning_rate": 4.157894736842106e-05,
338
+ "loss": 0.1836,
339
+ "step": 42
340
+ },
341
+ {
342
+ "epoch": 0.86,
343
+ "grad_norm": 12.08105754852295,
344
+ "learning_rate": 4.1315789473684214e-05,
345
+ "loss": 0.224,
346
+ "step": 43
347
+ },
348
+ {
349
+ "epoch": 0.88,
350
+ "grad_norm": 9.006488800048828,
351
+ "learning_rate": 4.105263157894737e-05,
352
+ "loss": 0.2139,
353
+ "step": 44
354
+ },
355
+ {
356
+ "epoch": 0.9,
357
+ "grad_norm": 7.3875956535339355,
358
+ "learning_rate": 4.078947368421053e-05,
359
+ "loss": 0.088,
360
+ "step": 45
361
+ },
362
+ {
363
+ "epoch": 0.92,
364
+ "grad_norm": 13.019746780395508,
365
+ "learning_rate": 4.0526315789473684e-05,
366
+ "loss": 0.2363,
367
+ "step": 46
368
+ },
369
+ {
370
+ "epoch": 0.94,
371
+ "grad_norm": 37.95082473754883,
372
+ "learning_rate": 4.026315789473684e-05,
373
+ "loss": 0.3728,
374
+ "step": 47
375
+ },
376
+ {
377
+ "epoch": 0.96,
378
+ "grad_norm": 13.99156665802002,
379
+ "learning_rate": 4e-05,
380
+ "loss": 0.1376,
381
+ "step": 48
382
+ },
383
+ {
384
+ "epoch": 0.98,
385
+ "grad_norm": 5.063250541687012,
386
+ "learning_rate": 3.973684210526316e-05,
387
+ "loss": 0.0832,
388
+ "step": 49
389
+ },
390
+ {
391
+ "epoch": 1.0,
392
+ "grad_norm": 13.046541213989258,
393
+ "learning_rate": 3.9473684210526316e-05,
394
+ "loss": 0.1096,
395
+ "step": 50
396
+ },
397
+ {
398
+ "epoch": 1.0,
399
+ "eval_accuracy": 0.9572368421052632,
400
+ "eval_loss": 0.12372467666864395,
401
+ "eval_runtime": 6.8765,
402
+ "eval_samples_per_second": 44.209,
403
+ "eval_steps_per_second": 1.163,
404
+ "step": 50
405
+ },
406
+ {
407
+ "epoch": 1.02,
408
+ "grad_norm": 4.865447998046875,
409
+ "learning_rate": 3.921052631578947e-05,
410
+ "loss": 0.0366,
411
+ "step": 51
412
+ },
413
+ {
414
+ "epoch": 1.04,
415
+ "grad_norm": 12.430005073547363,
416
+ "learning_rate": 3.894736842105263e-05,
417
+ "loss": 0.1301,
418
+ "step": 52
419
+ },
420
+ {
421
+ "epoch": 1.06,
422
+ "grad_norm": 2.1291539669036865,
423
+ "learning_rate": 3.868421052631579e-05,
424
+ "loss": 0.0168,
425
+ "step": 53
426
+ },
427
+ {
428
+ "epoch": 1.08,
429
+ "grad_norm": 1.6922448873519897,
430
+ "learning_rate": 3.842105263157895e-05,
431
+ "loss": 0.0074,
432
+ "step": 54
433
+ },
434
+ {
435
+ "epoch": 1.1,
436
+ "grad_norm": 5.851077556610107,
437
+ "learning_rate": 3.815789473684211e-05,
438
+ "loss": 0.0216,
439
+ "step": 55
440
+ },
441
+ {
442
+ "epoch": 1.12,
443
+ "grad_norm": 21.35593605041504,
444
+ "learning_rate": 3.789473684210527e-05,
445
+ "loss": 0.1171,
446
+ "step": 56
447
+ },
448
+ {
449
+ "epoch": 1.14,
450
+ "grad_norm": 9.791412353515625,
451
+ "learning_rate": 3.7631578947368425e-05,
452
+ "loss": 0.1688,
453
+ "step": 57
454
+ },
455
+ {
456
+ "epoch": 1.16,
457
+ "grad_norm": 3.2542812824249268,
458
+ "learning_rate": 3.736842105263158e-05,
459
+ "loss": 0.0236,
460
+ "step": 58
461
+ },
462
+ {
463
+ "epoch": 1.18,
464
+ "grad_norm": 9.557928085327148,
465
+ "learning_rate": 3.710526315789474e-05,
466
+ "loss": 0.1782,
467
+ "step": 59
468
+ },
469
+ {
470
+ "epoch": 1.2,
471
+ "grad_norm": 12.514122009277344,
472
+ "learning_rate": 3.6842105263157895e-05,
473
+ "loss": 0.0905,
474
+ "step": 60
475
+ },
476
+ {
477
+ "epoch": 1.2,
478
+ "eval_accuracy": 0.9967105263157895,
479
+ "eval_loss": 0.027645185589790344,
480
+ "eval_runtime": 7.1325,
481
+ "eval_samples_per_second": 42.622,
482
+ "eval_steps_per_second": 1.122,
483
+ "step": 60
484
+ },
485
+ {
486
+ "epoch": 1.22,
487
+ "grad_norm": 4.434255599975586,
488
+ "learning_rate": 3.657894736842106e-05,
489
+ "loss": 0.0276,
490
+ "step": 61
491
+ },
492
+ {
493
+ "epoch": 1.24,
494
+ "grad_norm": 6.304673671722412,
495
+ "learning_rate": 3.6315789473684214e-05,
496
+ "loss": 0.0508,
497
+ "step": 62
498
+ },
499
+ {
500
+ "epoch": 1.26,
501
+ "grad_norm": 3.844200372695923,
502
+ "learning_rate": 3.605263157894737e-05,
503
+ "loss": 0.0199,
504
+ "step": 63
505
+ },
506
+ {
507
+ "epoch": 1.28,
508
+ "grad_norm": 0.8185235261917114,
509
+ "learning_rate": 3.578947368421053e-05,
510
+ "loss": 0.0069,
511
+ "step": 64
512
+ },
513
+ {
514
+ "epoch": 1.3,
515
+ "grad_norm": 6.909903049468994,
516
+ "learning_rate": 3.5526315789473684e-05,
517
+ "loss": 0.0597,
518
+ "step": 65
519
+ },
520
+ {
521
+ "epoch": 1.32,
522
+ "grad_norm": 3.5652379989624023,
523
+ "learning_rate": 3.526315789473684e-05,
524
+ "loss": 0.0141,
525
+ "step": 66
526
+ },
527
+ {
528
+ "epoch": 1.34,
529
+ "grad_norm": 11.72857666015625,
530
+ "learning_rate": 3.5e-05,
531
+ "loss": 0.1865,
532
+ "step": 67
533
+ },
534
+ {
535
+ "epoch": 1.36,
536
+ "grad_norm": 10.541884422302246,
537
+ "learning_rate": 3.473684210526316e-05,
538
+ "loss": 0.0606,
539
+ "step": 68
540
+ },
541
+ {
542
+ "epoch": 1.38,
543
+ "grad_norm": 5.210526943206787,
544
+ "learning_rate": 3.447368421052632e-05,
545
+ "loss": 0.0276,
546
+ "step": 69
547
+ },
548
+ {
549
+ "epoch": 1.4,
550
+ "grad_norm": 1.0411546230316162,
551
+ "learning_rate": 3.421052631578947e-05,
552
+ "loss": 0.009,
553
+ "step": 70
554
+ },
555
+ {
556
+ "epoch": 1.4,
557
+ "eval_accuracy": 0.9901315789473685,
558
+ "eval_loss": 0.020390018820762634,
559
+ "eval_runtime": 7.2971,
560
+ "eval_samples_per_second": 41.66,
561
+ "eval_steps_per_second": 1.096,
562
+ "step": 70
563
+ },
564
+ {
565
+ "epoch": 1.42,
566
+ "grad_norm": 0.7153753638267517,
567
+ "learning_rate": 3.3947368421052636e-05,
568
+ "loss": 0.0049,
569
+ "step": 71
570
+ },
571
+ {
572
+ "epoch": 1.44,
573
+ "grad_norm": 4.828935623168945,
574
+ "learning_rate": 3.368421052631579e-05,
575
+ "loss": 0.0297,
576
+ "step": 72
577
+ },
578
+ {
579
+ "epoch": 1.46,
580
+ "grad_norm": 9.103313446044922,
581
+ "learning_rate": 3.342105263157895e-05,
582
+ "loss": 0.071,
583
+ "step": 73
584
+ },
585
+ {
586
+ "epoch": 1.48,
587
+ "grad_norm": 7.635372161865234,
588
+ "learning_rate": 3.3157894736842106e-05,
589
+ "loss": 0.0363,
590
+ "step": 74
591
+ },
592
+ {
593
+ "epoch": 1.5,
594
+ "grad_norm": 8.57288646697998,
595
+ "learning_rate": 3.289473684210527e-05,
596
+ "loss": 0.0566,
597
+ "step": 75
598
+ },
599
+ {
600
+ "epoch": 1.52,
601
+ "grad_norm": 8.89769172668457,
602
+ "learning_rate": 3.2631578947368426e-05,
603
+ "loss": 0.0213,
604
+ "step": 76
605
+ },
606
+ {
607
+ "epoch": 1.54,
608
+ "grad_norm": 2.804738998413086,
609
+ "learning_rate": 3.236842105263158e-05,
610
+ "loss": 0.0138,
611
+ "step": 77
612
+ },
613
+ {
614
+ "epoch": 1.56,
615
+ "grad_norm": 4.841202259063721,
616
+ "learning_rate": 3.210526315789474e-05,
617
+ "loss": 0.0181,
618
+ "step": 78
619
+ },
620
+ {
621
+ "epoch": 1.58,
622
+ "grad_norm": 7.109033584594727,
623
+ "learning_rate": 3.1842105263157895e-05,
624
+ "loss": 0.0332,
625
+ "step": 79
626
+ },
627
+ {
628
+ "epoch": 1.6,
629
+ "grad_norm": 13.540718078613281,
630
+ "learning_rate": 3.157894736842105e-05,
631
+ "loss": 0.1667,
632
+ "step": 80
633
+ },
634
+ {
635
+ "epoch": 1.6,
636
+ "eval_accuracy": 0.9703947368421053,
637
+ "eval_loss": 0.10031529515981674,
638
+ "eval_runtime": 6.7491,
639
+ "eval_samples_per_second": 45.043,
640
+ "eval_steps_per_second": 1.185,
641
+ "step": 80
642
+ },
643
+ {
644
+ "epoch": 1.62,
645
+ "grad_norm": 30.279451370239258,
646
+ "learning_rate": 3.131578947368421e-05,
647
+ "loss": 0.1139,
648
+ "step": 81
649
+ },
650
+ {
651
+ "epoch": 1.64,
652
+ "grad_norm": 7.229616165161133,
653
+ "learning_rate": 3.105263157894737e-05,
654
+ "loss": 0.0284,
655
+ "step": 82
656
+ },
657
+ {
658
+ "epoch": 1.66,
659
+ "grad_norm": 0.327736496925354,
660
+ "learning_rate": 3.078947368421053e-05,
661
+ "loss": 0.0032,
662
+ "step": 83
663
+ },
664
+ {
665
+ "epoch": 1.68,
666
+ "grad_norm": 0.15309709310531616,
667
+ "learning_rate": 3.0526315789473684e-05,
668
+ "loss": 0.0011,
669
+ "step": 84
670
+ },
671
+ {
672
+ "epoch": 1.7,
673
+ "grad_norm": 13.175533294677734,
674
+ "learning_rate": 3.0263157894736844e-05,
675
+ "loss": 0.039,
676
+ "step": 85
677
+ },
678
+ {
679
+ "epoch": 1.72,
680
+ "grad_norm": 24.578662872314453,
681
+ "learning_rate": 3e-05,
682
+ "loss": 0.1834,
683
+ "step": 86
684
+ },
685
+ {
686
+ "epoch": 1.74,
687
+ "grad_norm": 6.690978527069092,
688
+ "learning_rate": 2.9736842105263157e-05,
689
+ "loss": 0.0214,
690
+ "step": 87
691
+ },
692
+ {
693
+ "epoch": 1.76,
694
+ "grad_norm": 21.097963333129883,
695
+ "learning_rate": 2.9473684210526314e-05,
696
+ "loss": 0.1569,
697
+ "step": 88
698
+ },
699
+ {
700
+ "epoch": 1.78,
701
+ "grad_norm": 7.984706401824951,
702
+ "learning_rate": 2.9210526315789477e-05,
703
+ "loss": 0.0326,
704
+ "step": 89
705
+ },
706
+ {
707
+ "epoch": 1.8,
708
+ "grad_norm": 11.008134841918945,
709
+ "learning_rate": 2.8947368421052634e-05,
710
+ "loss": 0.0602,
711
+ "step": 90
712
+ },
713
+ {
714
+ "epoch": 1.8,
715
+ "eval_accuracy": 0.9868421052631579,
716
+ "eval_loss": 0.052807241678237915,
717
+ "eval_runtime": 7.1111,
718
+ "eval_samples_per_second": 42.75,
719
+ "eval_steps_per_second": 1.125,
720
+ "step": 90
721
+ },
722
+ {
723
+ "epoch": 1.82,
724
+ "grad_norm": 21.149032592773438,
725
+ "learning_rate": 2.868421052631579e-05,
726
+ "loss": 0.0265,
727
+ "step": 91
728
+ },
729
+ {
730
+ "epoch": 1.84,
731
+ "grad_norm": 3.4383814334869385,
732
+ "learning_rate": 2.842105263157895e-05,
733
+ "loss": 0.0059,
734
+ "step": 92
735
+ },
736
+ {
737
+ "epoch": 1.86,
738
+ "grad_norm": 6.291805267333984,
739
+ "learning_rate": 2.8157894736842106e-05,
740
+ "loss": 0.0177,
741
+ "step": 93
742
+ },
743
+ {
744
+ "epoch": 1.88,
745
+ "grad_norm": 1.5197041034698486,
746
+ "learning_rate": 2.7894736842105263e-05,
747
+ "loss": 0.0065,
748
+ "step": 94
749
+ },
750
+ {
751
+ "epoch": 1.9,
752
+ "grad_norm": 0.47119140625,
753
+ "learning_rate": 2.7631578947368426e-05,
754
+ "loss": 0.0018,
755
+ "step": 95
756
+ },
757
+ {
758
+ "epoch": 1.92,
759
+ "grad_norm": 7.311519145965576,
760
+ "learning_rate": 2.7368421052631583e-05,
761
+ "loss": 0.0235,
762
+ "step": 96
763
+ },
764
+ {
765
+ "epoch": 1.94,
766
+ "grad_norm": 53.3145866394043,
767
+ "learning_rate": 2.710526315789474e-05,
768
+ "loss": 0.1186,
769
+ "step": 97
770
+ },
771
+ {
772
+ "epoch": 1.96,
773
+ "grad_norm": 13.420350074768066,
774
+ "learning_rate": 2.6842105263157896e-05,
775
+ "loss": 0.2332,
776
+ "step": 98
777
+ },
778
+ {
779
+ "epoch": 1.98,
780
+ "grad_norm": 13.3897123336792,
781
+ "learning_rate": 2.6578947368421052e-05,
782
+ "loss": 0.0759,
783
+ "step": 99
784
+ },
785
+ {
786
+ "epoch": 2.0,
787
+ "grad_norm": 1.8416231870651245,
788
+ "learning_rate": 2.6315789473684212e-05,
789
+ "loss": 0.0076,
790
+ "step": 100
791
+ },
792
+ {
793
+ "epoch": 2.0,
794
+ "eval_accuracy": 0.993421052631579,
795
+ "eval_loss": 0.027568014338612556,
796
+ "eval_runtime": 7.0992,
797
+ "eval_samples_per_second": 42.822,
798
+ "eval_steps_per_second": 1.127,
799
+ "step": 100
800
+ },
801
+ {
802
+ "epoch": 2.02,
803
+ "grad_norm": 6.973387241363525,
804
+ "learning_rate": 2.605263157894737e-05,
805
+ "loss": 0.0255,
806
+ "step": 101
807
+ },
808
+ {
809
+ "epoch": 2.04,
810
+ "grad_norm": 0.33660009503364563,
811
+ "learning_rate": 2.578947368421053e-05,
812
+ "loss": 0.0012,
813
+ "step": 102
814
+ },
815
+ {
816
+ "epoch": 2.06,
817
+ "grad_norm": 11.269298553466797,
818
+ "learning_rate": 2.5526315789473688e-05,
819
+ "loss": 0.1097,
820
+ "step": 103
821
+ },
822
+ {
823
+ "epoch": 2.08,
824
+ "grad_norm": 7.602676868438721,
825
+ "learning_rate": 2.5263157894736845e-05,
826
+ "loss": 0.034,
827
+ "step": 104
828
+ },
829
+ {
830
+ "epoch": 2.1,
831
+ "grad_norm": 7.900285720825195,
832
+ "learning_rate": 2.5e-05,
833
+ "loss": 0.0399,
834
+ "step": 105
835
+ },
836
+ {
837
+ "epoch": 2.12,
838
+ "grad_norm": 4.07220983505249,
839
+ "learning_rate": 2.4736842105263158e-05,
840
+ "loss": 0.0055,
841
+ "step": 106
842
+ },
843
+ {
844
+ "epoch": 2.14,
845
+ "grad_norm": 0.09085848927497864,
846
+ "learning_rate": 2.4473684210526318e-05,
847
+ "loss": 0.0005,
848
+ "step": 107
849
+ },
850
+ {
851
+ "epoch": 2.16,
852
+ "grad_norm": 0.5666660666465759,
853
+ "learning_rate": 2.4210526315789474e-05,
854
+ "loss": 0.0029,
855
+ "step": 108
856
+ },
857
+ {
858
+ "epoch": 2.18,
859
+ "grad_norm": 12.90062427520752,
860
+ "learning_rate": 2.394736842105263e-05,
861
+ "loss": 0.051,
862
+ "step": 109
863
+ },
864
+ {
865
+ "epoch": 2.2,
866
+ "grad_norm": 0.24952377378940582,
867
+ "learning_rate": 2.368421052631579e-05,
868
+ "loss": 0.0007,
869
+ "step": 110
870
+ },
871
+ {
872
+ "epoch": 2.2,
873
+ "eval_accuracy": 0.9835526315789473,
874
+ "eval_loss": 0.05723079293966293,
875
+ "eval_runtime": 7.2757,
876
+ "eval_samples_per_second": 41.783,
877
+ "eval_steps_per_second": 1.1,
878
+ "step": 110
879
+ },
880
+ {
881
+ "epoch": 2.22,
882
+ "grad_norm": 2.654731512069702,
883
+ "learning_rate": 2.342105263157895e-05,
884
+ "loss": 0.0036,
885
+ "step": 111
886
+ },
887
+ {
888
+ "epoch": 2.24,
889
+ "grad_norm": 0.5114130973815918,
890
+ "learning_rate": 2.3157894736842107e-05,
891
+ "loss": 0.0012,
892
+ "step": 112
893
+ },
894
+ {
895
+ "epoch": 2.26,
896
+ "grad_norm": 9.55749797821045,
897
+ "learning_rate": 2.2894736842105263e-05,
898
+ "loss": 0.0195,
899
+ "step": 113
900
+ },
901
+ {
902
+ "epoch": 2.28,
903
+ "grad_norm": 11.102432250976562,
904
+ "learning_rate": 2.2631578947368423e-05,
905
+ "loss": 0.0204,
906
+ "step": 114
907
+ },
908
+ {
909
+ "epoch": 2.3,
910
+ "grad_norm": 2.684544324874878,
911
+ "learning_rate": 2.236842105263158e-05,
912
+ "loss": 0.0047,
913
+ "step": 115
914
+ },
915
+ {
916
+ "epoch": 2.32,
917
+ "grad_norm": 0.22236469388008118,
918
+ "learning_rate": 2.2105263157894736e-05,
919
+ "loss": 0.0008,
920
+ "step": 116
921
+ },
922
+ {
923
+ "epoch": 2.34,
924
+ "grad_norm": 8.926434516906738,
925
+ "learning_rate": 2.1842105263157896e-05,
926
+ "loss": 0.0223,
927
+ "step": 117
928
+ },
929
+ {
930
+ "epoch": 2.36,
931
+ "grad_norm": 3.3315141201019287,
932
+ "learning_rate": 2.1578947368421053e-05,
933
+ "loss": 0.0117,
934
+ "step": 118
935
+ },
936
+ {
937
+ "epoch": 2.38,
938
+ "grad_norm": 10.430353164672852,
939
+ "learning_rate": 2.1315789473684212e-05,
940
+ "loss": 0.0303,
941
+ "step": 119
942
+ },
943
+ {
944
+ "epoch": 2.4,
945
+ "grad_norm": 0.15297500789165497,
946
+ "learning_rate": 2.105263157894737e-05,
947
+ "loss": 0.0008,
948
+ "step": 120
949
+ },
950
+ {
951
+ "epoch": 2.4,
952
+ "eval_accuracy": 0.9967105263157895,
953
+ "eval_loss": 0.009093067608773708,
954
+ "eval_runtime": 6.8131,
955
+ "eval_samples_per_second": 44.62,
956
+ "eval_steps_per_second": 1.174,
957
+ "step": 120
958
+ },
959
+ {
960
+ "epoch": 2.42,
961
+ "grad_norm": 0.6949332356452942,
962
+ "learning_rate": 2.078947368421053e-05,
963
+ "loss": 0.0018,
964
+ "step": 121
965
+ },
966
+ {
967
+ "epoch": 2.44,
968
+ "grad_norm": 0.10584941506385803,
969
+ "learning_rate": 2.0526315789473685e-05,
970
+ "loss": 0.0006,
971
+ "step": 122
972
+ },
973
+ {
974
+ "epoch": 2.46,
975
+ "grad_norm": 2.782750368118286,
976
+ "learning_rate": 2.0263157894736842e-05,
977
+ "loss": 0.0064,
978
+ "step": 123
979
+ },
980
+ {
981
+ "epoch": 2.48,
982
+ "grad_norm": 0.1843283325433731,
983
+ "learning_rate": 2e-05,
984
+ "loss": 0.0007,
985
+ "step": 124
986
+ },
987
+ {
988
+ "epoch": 2.5,
989
+ "grad_norm": 19.077423095703125,
990
+ "learning_rate": 1.9736842105263158e-05,
991
+ "loss": 0.1095,
992
+ "step": 125
993
+ },
994
+ {
995
+ "epoch": 2.52,
996
+ "grad_norm": 9.631857872009277,
997
+ "learning_rate": 1.9473684210526315e-05,
998
+ "loss": 0.0286,
999
+ "step": 126
1000
+ },
1001
+ {
1002
+ "epoch": 2.54,
1003
+ "grad_norm": 2.46711802482605,
1004
+ "learning_rate": 1.9210526315789474e-05,
1005
+ "loss": 0.0042,
1006
+ "step": 127
1007
+ },
1008
+ {
1009
+ "epoch": 2.56,
1010
+ "grad_norm": 0.9471967220306396,
1011
+ "learning_rate": 1.8947368421052634e-05,
1012
+ "loss": 0.0038,
1013
+ "step": 128
1014
+ },
1015
+ {
1016
+ "epoch": 2.58,
1017
+ "grad_norm": 0.1509537547826767,
1018
+ "learning_rate": 1.868421052631579e-05,
1019
+ "loss": 0.0009,
1020
+ "step": 129
1021
+ },
1022
+ {
1023
+ "epoch": 2.6,
1024
+ "grad_norm": 16.95519256591797,
1025
+ "learning_rate": 1.8421052631578947e-05,
1026
+ "loss": 0.1277,
1027
+ "step": 130
1028
+ },
1029
+ {
1030
+ "epoch": 2.6,
1031
+ "eval_accuracy": 0.9967105263157895,
1032
+ "eval_loss": 0.004848334938287735,
1033
+ "eval_runtime": 7.1393,
1034
+ "eval_samples_per_second": 42.581,
1035
+ "eval_steps_per_second": 1.121,
1036
+ "step": 130
1037
+ },
1038
+ {
1039
+ "epoch": 2.62,
1040
+ "grad_norm": 0.3852033317089081,
1041
+ "learning_rate": 1.8157894736842107e-05,
1042
+ "loss": 0.0009,
1043
+ "step": 131
1044
+ },
1045
+ {
1046
+ "epoch": 2.64,
1047
+ "grad_norm": 0.23005267977714539,
1048
+ "learning_rate": 1.7894736842105264e-05,
1049
+ "loss": 0.0006,
1050
+ "step": 132
1051
+ },
1052
+ {
1053
+ "epoch": 2.66,
1054
+ "grad_norm": 2.9750654697418213,
1055
+ "learning_rate": 1.763157894736842e-05,
1056
+ "loss": 0.0096,
1057
+ "step": 133
1058
+ },
1059
+ {
1060
+ "epoch": 2.68,
1061
+ "grad_norm": 6.330101490020752,
1062
+ "learning_rate": 1.736842105263158e-05,
1063
+ "loss": 0.0224,
1064
+ "step": 134
1065
+ },
1066
+ {
1067
+ "epoch": 2.7,
1068
+ "grad_norm": 0.2537049949169159,
1069
+ "learning_rate": 1.7105263157894737e-05,
1070
+ "loss": 0.001,
1071
+ "step": 135
1072
+ },
1073
+ {
1074
+ "epoch": 2.72,
1075
+ "grad_norm": 0.49432969093322754,
1076
+ "learning_rate": 1.6842105263157896e-05,
1077
+ "loss": 0.002,
1078
+ "step": 136
1079
+ },
1080
+ {
1081
+ "epoch": 2.74,
1082
+ "grad_norm": 0.878919243812561,
1083
+ "learning_rate": 1.6578947368421053e-05,
1084
+ "loss": 0.0022,
1085
+ "step": 137
1086
+ },
1087
+ {
1088
+ "epoch": 2.76,
1089
+ "grad_norm": 0.4538193941116333,
1090
+ "learning_rate": 1.6315789473684213e-05,
1091
+ "loss": 0.0016,
1092
+ "step": 138
1093
+ },
1094
+ {
1095
+ "epoch": 2.78,
1096
+ "grad_norm": 13.346075057983398,
1097
+ "learning_rate": 1.605263157894737e-05,
1098
+ "loss": 0.0384,
1099
+ "step": 139
1100
+ },
1101
+ {
1102
+ "epoch": 2.8,
1103
+ "grad_norm": 0.42253851890563965,
1104
+ "learning_rate": 1.5789473684210526e-05,
1105
+ "loss": 0.0007,
1106
+ "step": 140
1107
+ },
1108
+ {
1109
+ "epoch": 2.8,
1110
+ "eval_accuracy": 0.9736842105263158,
1111
+ "eval_loss": 0.08703595399856567,
1112
+ "eval_runtime": 7.1737,
1113
+ "eval_samples_per_second": 42.377,
1114
+ "eval_steps_per_second": 1.115,
1115
+ "step": 140
1116
+ },
1117
+ {
1118
+ "epoch": 2.82,
1119
+ "grad_norm": 8.999384880065918,
1120
+ "learning_rate": 1.5526315789473686e-05,
1121
+ "loss": 0.041,
1122
+ "step": 141
1123
+ },
1124
+ {
1125
+ "epoch": 2.84,
1126
+ "grad_norm": 0.34943702816963196,
1127
+ "learning_rate": 1.5263157894736842e-05,
1128
+ "loss": 0.0008,
1129
+ "step": 142
1130
+ },
1131
+ {
1132
+ "epoch": 2.86,
1133
+ "grad_norm": 41.58428192138672,
1134
+ "learning_rate": 1.5e-05,
1135
+ "loss": 0.0352,
1136
+ "step": 143
1137
+ },
1138
+ {
1139
+ "epoch": 2.88,
1140
+ "grad_norm": 1.358328104019165,
1141
+ "learning_rate": 1.4736842105263157e-05,
1142
+ "loss": 0.004,
1143
+ "step": 144
1144
+ },
1145
+ {
1146
+ "epoch": 2.9,
1147
+ "grad_norm": 26.084732055664062,
1148
+ "learning_rate": 1.4473684210526317e-05,
1149
+ "loss": 0.1456,
1150
+ "step": 145
1151
+ },
1152
+ {
1153
+ "epoch": 2.92,
1154
+ "grad_norm": 4.426596164703369,
1155
+ "learning_rate": 1.4210526315789475e-05,
1156
+ "loss": 0.0056,
1157
+ "step": 146
1158
+ },
1159
+ {
1160
+ "epoch": 2.94,
1161
+ "grad_norm": 7.729344367980957,
1162
+ "learning_rate": 1.3947368421052631e-05,
1163
+ "loss": 0.0536,
1164
+ "step": 147
1165
+ },
1166
+ {
1167
+ "epoch": 2.96,
1168
+ "grad_norm": 0.49353259801864624,
1169
+ "learning_rate": 1.3684210526315791e-05,
1170
+ "loss": 0.0035,
1171
+ "step": 148
1172
+ },
1173
+ {
1174
+ "epoch": 2.98,
1175
+ "grad_norm": 15.301690101623535,
1176
+ "learning_rate": 1.3421052631578948e-05,
1177
+ "loss": 0.0138,
1178
+ "step": 149
1179
+ },
1180
+ {
1181
+ "epoch": 3.0,
1182
+ "grad_norm": 0.520106852054596,
1183
+ "learning_rate": 1.3157894736842106e-05,
1184
+ "loss": 0.0008,
1185
+ "step": 150
1186
+ },
1187
+ {
1188
+ "epoch": 3.0,
1189
+ "eval_accuracy": 0.993421052631579,
1190
+ "eval_loss": 0.005907059647142887,
1191
+ "eval_runtime": 7.2044,
1192
+ "eval_samples_per_second": 42.197,
1193
+ "eval_steps_per_second": 1.11,
1194
+ "step": 150
1195
+ },
1196
+ {
1197
+ "epoch": 3.02,
1198
+ "grad_norm": 0.7136516571044922,
1199
+ "learning_rate": 1.2894736842105264e-05,
1200
+ "loss": 0.0028,
1201
+ "step": 151
1202
+ },
1203
+ {
1204
+ "epoch": 3.04,
1205
+ "grad_norm": 0.28995925188064575,
1206
+ "learning_rate": 1.2631578947368422e-05,
1207
+ "loss": 0.0006,
1208
+ "step": 152
1209
+ },
1210
+ {
1211
+ "epoch": 3.06,
1212
+ "grad_norm": 9.869847297668457,
1213
+ "learning_rate": 1.2368421052631579e-05,
1214
+ "loss": 0.0342,
1215
+ "step": 153
1216
+ },
1217
+ {
1218
+ "epoch": 3.08,
1219
+ "grad_norm": 0.20523004233837128,
1220
+ "learning_rate": 1.2105263157894737e-05,
1221
+ "loss": 0.0005,
1222
+ "step": 154
1223
+ },
1224
+ {
1225
+ "epoch": 3.1,
1226
+ "grad_norm": 8.038060188293457,
1227
+ "learning_rate": 1.1842105263157895e-05,
1228
+ "loss": 0.0178,
1229
+ "step": 155
1230
+ },
1231
+ {
1232
+ "epoch": 3.12,
1233
+ "grad_norm": 1.2011505365371704,
1234
+ "learning_rate": 1.1578947368421053e-05,
1235
+ "loss": 0.002,
1236
+ "step": 156
1237
+ },
1238
+ {
1239
+ "epoch": 3.14,
1240
+ "grad_norm": 1.0488544702529907,
1241
+ "learning_rate": 1.1315789473684212e-05,
1242
+ "loss": 0.0025,
1243
+ "step": 157
1244
+ },
1245
+ {
1246
+ "epoch": 3.16,
1247
+ "grad_norm": 0.24755439162254333,
1248
+ "learning_rate": 1.1052631578947368e-05,
1249
+ "loss": 0.0005,
1250
+ "step": 158
1251
+ },
1252
+ {
1253
+ "epoch": 3.18,
1254
+ "grad_norm": 0.37720346450805664,
1255
+ "learning_rate": 1.0789473684210526e-05,
1256
+ "loss": 0.0007,
1257
+ "step": 159
1258
+ },
1259
+ {
1260
+ "epoch": 3.2,
1261
+ "grad_norm": 0.054420772939920425,
1262
+ "learning_rate": 1.0526315789473684e-05,
1263
+ "loss": 0.0003,
1264
+ "step": 160
1265
+ },
1266
+ {
1267
+ "epoch": 3.2,
1268
+ "eval_accuracy": 0.9967105263157895,
1269
+ "eval_loss": 0.012113516218960285,
1270
+ "eval_runtime": 7.4779,
1271
+ "eval_samples_per_second": 40.653,
1272
+ "eval_steps_per_second": 1.07,
1273
+ "step": 160
1274
+ },
1275
+ {
1276
+ "epoch": 3.22,
1277
+ "grad_norm": 3.170163631439209,
1278
+ "learning_rate": 1.0263157894736843e-05,
1279
+ "loss": 0.0048,
1280
+ "step": 161
1281
+ },
1282
+ {
1283
+ "epoch": 3.24,
1284
+ "grad_norm": 9.659165382385254,
1285
+ "learning_rate": 1e-05,
1286
+ "loss": 0.006,
1287
+ "step": 162
1288
+ },
1289
+ {
1290
+ "epoch": 3.26,
1291
+ "grad_norm": 0.07884739339351654,
1292
+ "learning_rate": 9.736842105263157e-06,
1293
+ "loss": 0.0003,
1294
+ "step": 163
1295
+ },
1296
+ {
1297
+ "epoch": 3.28,
1298
+ "grad_norm": 1.1162643432617188,
1299
+ "learning_rate": 9.473684210526317e-06,
1300
+ "loss": 0.0019,
1301
+ "step": 164
1302
+ },
1303
+ {
1304
+ "epoch": 3.3,
1305
+ "grad_norm": 0.3206580877304077,
1306
+ "learning_rate": 9.210526315789474e-06,
1307
+ "loss": 0.001,
1308
+ "step": 165
1309
+ },
1310
+ {
1311
+ "epoch": 3.32,
1312
+ "grad_norm": 15.26516056060791,
1313
+ "learning_rate": 8.947368421052632e-06,
1314
+ "loss": 0.0467,
1315
+ "step": 166
1316
+ },
1317
+ {
1318
+ "epoch": 3.34,
1319
+ "grad_norm": 0.6477659940719604,
1320
+ "learning_rate": 8.68421052631579e-06,
1321
+ "loss": 0.002,
1322
+ "step": 167
1323
+ },
1324
+ {
1325
+ "epoch": 3.36,
1326
+ "grad_norm": 16.165756225585938,
1327
+ "learning_rate": 8.421052631578948e-06,
1328
+ "loss": 0.0486,
1329
+ "step": 168
1330
+ },
1331
+ {
1332
+ "epoch": 3.38,
1333
+ "grad_norm": 0.3942013382911682,
1334
+ "learning_rate": 8.157894736842106e-06,
1335
+ "loss": 0.001,
1336
+ "step": 169
1337
+ },
1338
+ {
1339
+ "epoch": 3.4,
1340
+ "grad_norm": 0.2746890187263489,
1341
+ "learning_rate": 7.894736842105263e-06,
1342
+ "loss": 0.0008,
1343
+ "step": 170
1344
+ },
1345
+ {
1346
+ "epoch": 3.4,
1347
+ "eval_accuracy": 0.993421052631579,
1348
+ "eval_loss": 0.02530418336391449,
1349
+ "eval_runtime": 6.8199,
1350
+ "eval_samples_per_second": 44.575,
1351
+ "eval_steps_per_second": 1.173,
1352
+ "step": 170
1353
+ },
1354
+ {
1355
+ "epoch": 3.42,
1356
+ "grad_norm": 0.1466667354106903,
1357
+ "learning_rate": 7.631578947368421e-06,
1358
+ "loss": 0.0006,
1359
+ "step": 171
1360
+ },
1361
+ {
1362
+ "epoch": 3.44,
1363
+ "grad_norm": 0.35398414731025696,
1364
+ "learning_rate": 7.3684210526315784e-06,
1365
+ "loss": 0.0008,
1366
+ "step": 172
1367
+ },
1368
+ {
1369
+ "epoch": 3.46,
1370
+ "grad_norm": 0.03358185663819313,
1371
+ "learning_rate": 7.1052631578947375e-06,
1372
+ "loss": 0.0001,
1373
+ "step": 173
1374
+ },
1375
+ {
1376
+ "epoch": 3.48,
1377
+ "grad_norm": 0.04026187211275101,
1378
+ "learning_rate": 6.842105263157896e-06,
1379
+ "loss": 0.0002,
1380
+ "step": 174
1381
+ },
1382
+ {
1383
+ "epoch": 3.5,
1384
+ "grad_norm": 0.10377287119626999,
1385
+ "learning_rate": 6.578947368421053e-06,
1386
+ "loss": 0.0008,
1387
+ "step": 175
1388
+ },
1389
+ {
1390
+ "epoch": 3.52,
1391
+ "grad_norm": 3.1232450008392334,
1392
+ "learning_rate": 6.315789473684211e-06,
1393
+ "loss": 0.0062,
1394
+ "step": 176
1395
+ },
1396
+ {
1397
+ "epoch": 3.54,
1398
+ "grad_norm": 0.0659070760011673,
1399
+ "learning_rate": 6.0526315789473685e-06,
1400
+ "loss": 0.0002,
1401
+ "step": 177
1402
+ },
1403
+ {
1404
+ "epoch": 3.56,
1405
+ "grad_norm": 0.4386169910430908,
1406
+ "learning_rate": 5.789473684210527e-06,
1407
+ "loss": 0.0007,
1408
+ "step": 178
1409
+ },
1410
+ {
1411
+ "epoch": 3.58,
1412
+ "grad_norm": 0.09403068572282791,
1413
+ "learning_rate": 5.526315789473684e-06,
1414
+ "loss": 0.0001,
1415
+ "step": 179
1416
+ },
1417
+ {
1418
+ "epoch": 3.6,
1419
+ "grad_norm": 0.011226998642086983,
1420
+ "learning_rate": 5.263157894736842e-06,
1421
+ "loss": 0.0,
1422
+ "step": 180
1423
+ },
1424
+ {
1425
+ "epoch": 3.6,
1426
+ "eval_accuracy": 0.9967105263157895,
1427
+ "eval_loss": 0.012991190887987614,
1428
+ "eval_runtime": 7.2339,
1429
+ "eval_samples_per_second": 42.024,
1430
+ "eval_steps_per_second": 1.106,
1431
+ "step": 180
1432
+ },
1433
+ {
1434
+ "epoch": 3.62,
1435
+ "grad_norm": 0.3960583209991455,
1436
+ "learning_rate": 5e-06,
1437
+ "loss": 0.0013,
1438
+ "step": 181
1439
+ },
1440
+ {
1441
+ "epoch": 3.64,
1442
+ "grad_norm": 0.028123315423727036,
1443
+ "learning_rate": 4.736842105263159e-06,
1444
+ "loss": 0.0001,
1445
+ "step": 182
1446
+ },
1447
+ {
1448
+ "epoch": 3.66,
1449
+ "grad_norm": 3.7100679874420166,
1450
+ "learning_rate": 4.473684210526316e-06,
1451
+ "loss": 0.0062,
1452
+ "step": 183
1453
+ },
1454
+ {
1455
+ "epoch": 3.68,
1456
+ "grad_norm": 0.027926042675971985,
1457
+ "learning_rate": 4.210526315789474e-06,
1458
+ "loss": 0.0001,
1459
+ "step": 184
1460
+ },
1461
+ {
1462
+ "epoch": 3.7,
1463
+ "grad_norm": 1.727767825126648,
1464
+ "learning_rate": 3.9473684210526315e-06,
1465
+ "loss": 0.0022,
1466
+ "step": 185
1467
+ },
1468
+ {
1469
+ "epoch": 3.72,
1470
+ "grad_norm": 1.4793553352355957,
1471
+ "learning_rate": 3.6842105263157892e-06,
1472
+ "loss": 0.0011,
1473
+ "step": 186
1474
+ },
1475
+ {
1476
+ "epoch": 3.74,
1477
+ "grad_norm": 2.847693681716919,
1478
+ "learning_rate": 3.421052631578948e-06,
1479
+ "loss": 0.004,
1480
+ "step": 187
1481
+ },
1482
+ {
1483
+ "epoch": 3.76,
1484
+ "grad_norm": 0.017291821539402008,
1485
+ "learning_rate": 3.1578947368421056e-06,
1486
+ "loss": 0.0001,
1487
+ "step": 188
1488
+ },
1489
+ {
1490
+ "epoch": 3.78,
1491
+ "grad_norm": 0.6957990527153015,
1492
+ "learning_rate": 2.8947368421052634e-06,
1493
+ "loss": 0.001,
1494
+ "step": 189
1495
+ },
1496
+ {
1497
+ "epoch": 3.8,
1498
+ "grad_norm": 0.032589249312877655,
1499
+ "learning_rate": 2.631578947368421e-06,
1500
+ "loss": 0.0001,
1501
+ "step": 190
1502
+ },
1503
+ {
1504
+ "epoch": 3.8,
1505
+ "eval_accuracy": 0.993421052631579,
1506
+ "eval_loss": 0.009236541576683521,
1507
+ "eval_runtime": 6.8219,
1508
+ "eval_samples_per_second": 44.563,
1509
+ "eval_steps_per_second": 1.173,
1510
+ "step": 190
1511
+ },
1512
+ {
1513
+ "epoch": 3.82,
1514
+ "grad_norm": 0.5736590027809143,
1515
+ "learning_rate": 2.3684210526315793e-06,
1516
+ "loss": 0.0013,
1517
+ "step": 191
1518
+ },
1519
+ {
1520
+ "epoch": 3.84,
1521
+ "grad_norm": 0.4599183201789856,
1522
+ "learning_rate": 2.105263157894737e-06,
1523
+ "loss": 0.0016,
1524
+ "step": 192
1525
+ },
1526
+ {
1527
+ "epoch": 3.86,
1528
+ "grad_norm": 0.0121466601267457,
1529
+ "learning_rate": 1.8421052631578946e-06,
1530
+ "loss": 0.0,
1531
+ "step": 193
1532
+ },
1533
+ {
1534
+ "epoch": 3.88,
1535
+ "grad_norm": 2.155550479888916,
1536
+ "learning_rate": 1.5789473684210528e-06,
1537
+ "loss": 0.0048,
1538
+ "step": 194
1539
+ },
1540
+ {
1541
+ "epoch": 3.9,
1542
+ "grad_norm": 0.3076814115047455,
1543
+ "learning_rate": 1.3157894736842106e-06,
1544
+ "loss": 0.0006,
1545
+ "step": 195
1546
+ },
1547
+ {
1548
+ "epoch": 3.92,
1549
+ "grad_norm": 0.05223652720451355,
1550
+ "learning_rate": 1.0526315789473685e-06,
1551
+ "loss": 0.0002,
1552
+ "step": 196
1553
+ },
1554
+ {
1555
+ "epoch": 3.94,
1556
+ "grad_norm": 0.10159199684858322,
1557
+ "learning_rate": 7.894736842105264e-07,
1558
+ "loss": 0.0001,
1559
+ "step": 197
1560
+ },
1561
+ {
1562
+ "epoch": 3.96,
1563
+ "grad_norm": 7.128465175628662,
1564
+ "learning_rate": 5.263157894736843e-07,
1565
+ "loss": 0.0243,
1566
+ "step": 198
1567
+ },
1568
+ {
1569
+ "epoch": 3.98,
1570
+ "grad_norm": 0.21553558111190796,
1571
+ "learning_rate": 2.6315789473684213e-07,
1572
+ "loss": 0.0007,
1573
+ "step": 199
1574
+ },
1575
+ {
1576
+ "epoch": 4.0,
1577
+ "grad_norm": 0.11059165745973587,
1578
+ "learning_rate": 0.0,
1579
+ "loss": 0.0002,
1580
+ "step": 200
1581
+ },
1582
+ {
1583
+ "epoch": 4.0,
1584
+ "eval_accuracy": 0.9967105263157895,
1585
+ "eval_loss": 0.011121152900159359,
1586
+ "eval_runtime": 7.2058,
1587
+ "eval_samples_per_second": 42.188,
1588
+ "eval_steps_per_second": 1.11,
1589
+ "step": 200
1590
+ }
1591
+ ],
1592
+ "logging_steps": 1,
1593
+ "max_steps": 200,
1594
+ "num_input_tokens_seen": 0,
1595
+ "num_train_epochs": 4,
1596
+ "save_steps": 10,
1597
+ "total_flos": 6.201834183515013e+17,
1598
+ "train_batch_size": 40,
1599
+ "trial_name": null,
1600
+ "trial_params": null
1601
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12326b817ccf86422554696b6e15d756b4174e67e313ecfbe14aadf3bb73244
3
+ size 4856
config.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Organika/sdxl-detector",
3
+ "_num_labels": 2,
4
+ "architectures": [
5
+ "SwinForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "depths": [
9
+ 2,
10
+ 2,
11
+ 18,
12
+ 2
13
+ ],
14
+ "drop_path_rate": 0.1,
15
+ "embed_dim": 128,
16
+ "encoder_stride": 32,
17
+ "hidden_act": "gelu",
18
+ "hidden_dropout_prob": 0.0,
19
+ "hidden_size": 1024,
20
+ "id2label": {
21
+ "0": "artificial",
22
+ "1": "human"
23
+ },
24
+ "image_size": 224,
25
+ "initializer_range": 0.02,
26
+ "label2id": {
27
+ "artificial": 0,
28
+ "human": 1
29
+ },
30
+ "layer_norm_eps": 1e-05,
31
+ "max_length": 128,
32
+ "mlp_ratio": 4.0,
33
+ "model_type": "swin",
34
+ "num_channels": 3,
35
+ "num_heads": [
36
+ 4,
37
+ 8,
38
+ 16,
39
+ 32
40
+ ],
41
+ "num_layers": 4,
42
+ "out_features": [
43
+ "stage4"
44
+ ],
45
+ "out_indices": [
46
+ 4
47
+ ],
48
+ "padding": "max_length",
49
+ "patch_size": 4,
50
+ "path_norm": true,
51
+ "problem_type": "single_label_classification",
52
+ "qkv_bias": true,
53
+ "stage_names": [
54
+ "stem",
55
+ "stage1",
56
+ "stage2",
57
+ "stage3",
58
+ "stage4"
59
+ ],
60
+ "torch_dtype": "float32",
61
+ "transformers_version": "4.39.3",
62
+ "use_absolute_embeddings": false,
63
+ "window_size": 7
64
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a9c215382ea7372ff3d6cc70c3c6268b24b661c627fbfe8ef12ac3abd2a13a
3
+ size 347498816
preprocessor_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format"
15
+ ],
16
+ "do_normalize": true,
17
+ "do_rescale": true,
18
+ "do_resize": true,
19
+ "image_mean": [
20
+ 0.485,
21
+ 0.456,
22
+ 0.406
23
+ ],
24
+ "image_processor_type": "ViTImageProcessor",
25
+ "image_std": [
26
+ 0.229,
27
+ 0.224,
28
+ 0.225
29
+ ],
30
+ "resample": 3,
31
+ "rescale_factor": 0.00392156862745098,
32
+ "size": {
33
+ "height": 224,
34
+ "width": 224
35
+ }
36
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12326b817ccf86422554696b6e15d756b4174e67e313ecfbe14aadf3bb73244
3
+ size 4856