besimray commited on
Commit
58961bb
1 Parent(s): aac17e3

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e02bf26d5b2401ec7dc326297f3ee2388f15d11930d5efb6984ae8f6428a10f9
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9258c3bf97aa3a5549055538a78ff5ecdd5a0381ae8cb44fed53bdf82e82eb7a
3
  size 22573704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da94be3ae0a64e853b92443c0a2c39df2e4402a3604ed63a25872c61f1cc51db
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff4e7a95fff2b96fc767f188fcd9147729a5edc6157305cc2de75ce14a9af34f
3
  size 11710970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27827d7d71d66eac185d181a061a3fc686c05fceae71aefa31bdc9f272ad8dc6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8128afb4afe4aecc26d8f0be5e4c4ed9a96e2778b2735f61e9a821ba55be2be6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b01233c08a586038ebf1cf3e5cbb4f41b3484fab28bfbbe42cb46fd4e382bde
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b390e57be002933c68cbb0976c807a453fcfb48626c716bc0894f16432712e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7271688580513,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-30",
4
- "epoch": 1.7142857142857144,
5
  "eval_steps": 10,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -249,6 +249,84 @@
249
  "eval_samples_per_second": 7.185,
250
  "eval_steps_per_second": 1.916,
251
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  }
253
  ],
254
  "logging_steps": 1,
@@ -277,7 +355,7 @@
277
  "attributes": {}
278
  }
279
  },
280
- "total_flos": 1.154618474102784e+16,
281
  "train_batch_size": 4,
282
  "trial_name": null,
283
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7075809240341187,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-40",
4
+ "epoch": 2.2857142857142856,
5
  "eval_steps": 10,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
249
  "eval_samples_per_second": 7.185,
250
  "eval_steps_per_second": 1.916,
251
  "step": 30
252
+ },
253
+ {
254
+ "epoch": 1.7714285714285714,
255
+ "grad_norm": 0.29218417406082153,
256
+ "learning_rate": 5.182610115288295e-05,
257
+ "loss": 0.6656,
258
+ "step": 31
259
+ },
260
+ {
261
+ "epoch": 1.8285714285714287,
262
+ "grad_norm": 0.30864810943603516,
263
+ "learning_rate": 4.817389884711705e-05,
264
+ "loss": 0.7106,
265
+ "step": 32
266
+ },
267
+ {
268
+ "epoch": 1.8857142857142857,
269
+ "grad_norm": 0.3442583382129669,
270
+ "learning_rate": 4.4531439581106295e-05,
271
+ "loss": 0.7097,
272
+ "step": 33
273
+ },
274
+ {
275
+ "epoch": 1.9428571428571428,
276
+ "grad_norm": 0.32284116744995117,
277
+ "learning_rate": 4.0918157451028185e-05,
278
+ "loss": 0.7053,
279
+ "step": 34
280
+ },
281
+ {
282
+ "epoch": 2.0,
283
+ "grad_norm": 0.2941528856754303,
284
+ "learning_rate": 3.735333088041596e-05,
285
+ "loss": 0.5891,
286
+ "step": 35
287
+ },
288
+ {
289
+ "epoch": 2.057142857142857,
290
+ "grad_norm": 0.3290832042694092,
291
+ "learning_rate": 3.38559797614277e-05,
292
+ "loss": 0.7012,
293
+ "step": 36
294
+ },
295
+ {
296
+ "epoch": 2.1142857142857143,
297
+ "grad_norm": 0.29661279916763306,
298
+ "learning_rate": 3.0444763975492208e-05,
299
+ "loss": 0.6034,
300
+ "step": 37
301
+ },
302
+ {
303
+ "epoch": 2.1714285714285713,
304
+ "grad_norm": 0.30858200788497925,
305
+ "learning_rate": 2.7137883834768073e-05,
306
+ "loss": 0.6878,
307
+ "step": 38
308
+ },
309
+ {
310
+ "epoch": 2.2285714285714286,
311
+ "grad_norm": 0.2789033353328705,
312
+ "learning_rate": 2.3952982975603496e-05,
313
+ "loss": 0.7019,
314
+ "step": 39
315
+ },
316
+ {
317
+ "epoch": 2.2857142857142856,
318
+ "grad_norm": 0.2946871519088745,
319
+ "learning_rate": 2.090705422210237e-05,
320
+ "loss": 0.6679,
321
+ "step": 40
322
+ },
323
+ {
324
+ "epoch": 2.2857142857142856,
325
+ "eval_loss": 0.7075809240341187,
326
+ "eval_runtime": 2.087,
327
+ "eval_samples_per_second": 7.187,
328
+ "eval_steps_per_second": 1.917,
329
+ "step": 40
330
  }
331
  ],
332
  "logging_steps": 1,
 
355
  "attributes": {}
356
  }
357
  },
358
+ "total_flos": 1.539491298803712e+16,
359
  "train_batch_size": 4,
360
  "trial_name": null,
361
  "trial_params": null