besimray commited on
Commit
7f658b1
1 Parent(s): 8e2bbb2

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9258c3bf97aa3a5549055538a78ff5ecdd5a0381ae8cb44fed53bdf82e82eb7a
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e00c7e2ca2f7201416ee0db7548754ff0b7dad3573e16f5759f340b0edf91035
3
  size 22573704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff4e7a95fff2b96fc767f188fcd9147729a5edc6157305cc2de75ce14a9af34f
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80645b0c4593e3ffa5f299466cc99572408a8e260842bda9dc94271f7e380e52
3
  size 11710970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8128afb4afe4aecc26d8f0be5e4c4ed9a96e2778b2735f61e9a821ba55be2be6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59a5254780d448320ee839ee90bfa0ce534a45ff625b1caabb810d268fac3ee
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68b390e57be002933c68cbb0976c807a453fcfb48626c716bc0894f16432712e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e76384fe2e1907e44b199e48722ea251cbbcfea1285f875115318fffa6d887
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7075809240341187,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-40",
4
- "epoch": 2.2857142857142856,
5
  "eval_steps": 10,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -327,6 +327,84 @@
327
  "eval_samples_per_second": 7.187,
328
  "eval_steps_per_second": 1.917,
329
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  }
331
  ],
332
  "logging_steps": 1,
@@ -355,7 +433,7 @@
355
  "attributes": {}
356
  }
357
  },
358
- "total_flos": 1.539491298803712e+16,
359
  "train_batch_size": 4,
360
  "trial_name": null,
361
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.704846203327179,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 2.857142857142857,
5
  "eval_steps": 10,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
327
  "eval_samples_per_second": 7.187,
328
  "eval_steps_per_second": 1.917,
329
  "step": 40
330
+ },
331
+ {
332
+ "epoch": 2.342857142857143,
333
+ "grad_norm": 0.30310121178627014,
334
+ "learning_rate": 1.801634892205545e-05,
335
+ "loss": 0.633,
336
+ "step": 41
337
+ },
338
+ {
339
+ "epoch": 2.4,
340
+ "grad_norm": 0.3069086968898773,
341
+ "learning_rate": 1.5296290238968303e-05,
342
+ "loss": 0.7029,
343
+ "step": 42
344
+ },
345
+ {
346
+ "epoch": 2.4571428571428573,
347
+ "grad_norm": 0.2964320182800293,
348
+ "learning_rate": 1.2761390862810907e-05,
349
+ "loss": 0.7385,
350
+ "step": 43
351
+ },
352
+ {
353
+ "epoch": 2.5142857142857142,
354
+ "grad_norm": 0.28013864159584045,
355
+ "learning_rate": 1.0425175578537299e-05,
356
+ "loss": 0.6347,
357
+ "step": 44
358
+ },
359
+ {
360
+ "epoch": 2.571428571428571,
361
+ "grad_norm": 0.29106035828590393,
362
+ "learning_rate": 8.30010910550611e-06,
363
+ "loss": 0.6346,
364
+ "step": 45
365
+ },
366
+ {
367
+ "epoch": 2.6285714285714286,
368
+ "grad_norm": 0.28448185324668884,
369
+ "learning_rate": 6.397529592809614e-06,
370
+ "loss": 0.6148,
371
+ "step": 46
372
+ },
373
+ {
374
+ "epoch": 2.685714285714286,
375
+ "grad_norm": 0.2868902087211609,
376
+ "learning_rate": 4.727588125342669e-06,
377
+ "loss": 0.5431,
378
+ "step": 47
379
+ },
380
+ {
381
+ "epoch": 2.742857142857143,
382
+ "grad_norm": 0.28776589035987854,
383
+ "learning_rate": 3.299194563372604e-06,
384
+ "loss": 0.6296,
385
+ "step": 48
386
+ },
387
+ {
388
+ "epoch": 2.8,
389
+ "grad_norm": 0.2633899748325348,
390
+ "learning_rate": 2.1199700045797077e-06,
391
+ "loss": 0.6345,
392
+ "step": 49
393
+ },
394
+ {
395
+ "epoch": 2.857142857142857,
396
+ "grad_norm": 0.2832770049571991,
397
+ "learning_rate": 1.196206122203647e-06,
398
+ "loss": 0.6468,
399
+ "step": 50
400
+ },
401
+ {
402
+ "epoch": 2.857142857142857,
403
+ "eval_loss": 0.704846203327179,
404
+ "eval_runtime": 2.0892,
405
+ "eval_samples_per_second": 7.18,
406
+ "eval_steps_per_second": 1.915,
407
+ "step": 50
408
  }
409
  ],
410
  "logging_steps": 1,
 
433
  "attributes": {}
434
  }
435
  },
436
+ "total_flos": 1.92436412350464e+16,
437
  "train_batch_size": 4,
438
  "trial_name": null,
439
  "trial_params": null