besimray commited on
Commit
58b82fc
·
verified ·
1 Parent(s): 42f297c

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f5a08da0ee0b418d960bbc7e08594d1873c63fb666a0f4c0d7776ad3c6af394
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c77bd252404343d59fa47e11d87b9d7494bdc0ce74fb6444c47871aca36d1f6
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f91c2b58570527b5de8d11055402027f916823c5e8256d59a932459e7bac2373
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65cf1d6e092f1f2385667e8efd41a0118af57063735c60905ff65cddd11c6b83
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:342d36d482e38fbc36cc0a28040792e6a4257d06cd930a5ace345aee66811294
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:597c452dbb6d2e2055a44445aa9e337aa4085eacd11fcbc7213a5eb6f6055867
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1074437c4c638775a20a86de2e689d1a14d0ff4e4137df2ddeb45e94a776caef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7803647d52fbd7429a283dc695ba7cf653ff890c06d5c50f67d0a09610438889
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6243711709976196,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-70",
4
- "epoch": 0.019767031415460642,
5
  "eval_steps": 10,
6
- "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -561,6 +561,84 @@
561
  "eval_samples_per_second": 5.542,
562
  "eval_steps_per_second": 5.542,
563
  "step": 70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  }
565
  ],
566
  "logging_steps": 1,
@@ -589,7 +667,7 @@
589
  "attributes": {}
590
  }
591
  },
592
- "total_flos": 6851625096314880.0,
593
  "train_batch_size": 1,
594
  "trial_name": null,
595
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.617271900177002,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
+ "epoch": 0.022590893046240734,
5
  "eval_steps": 10,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
561
  "eval_samples_per_second": 5.542,
562
  "eval_steps_per_second": 5.542,
563
  "step": 70
564
+ },
565
+ {
566
+ "epoch": 0.02004941757853865,
567
+ "grad_norm": 2.2794175148010254,
568
+ "learning_rate": 0.00019244917309000817,
569
+ "loss": 1.624,
570
+ "step": 71
571
+ },
572
+ {
573
+ "epoch": 0.02033180374161666,
574
+ "grad_norm": 1.5373339653015137,
575
+ "learning_rate": 0.00019220287022200707,
576
+ "loss": 1.8068,
577
+ "step": 72
578
+ },
579
+ {
580
+ "epoch": 0.02061418990469467,
581
+ "grad_norm": 0.9991006255149841,
582
+ "learning_rate": 0.0001919527772551451,
583
+ "loss": 1.6704,
584
+ "step": 73
585
+ },
586
+ {
587
+ "epoch": 0.02089657606777268,
588
+ "grad_norm": 1.392340064048767,
589
+ "learning_rate": 0.00019169890446976454,
590
+ "loss": 2.378,
591
+ "step": 74
592
+ },
593
+ {
594
+ "epoch": 0.02117896223085069,
595
+ "grad_norm": 1.1224641799926758,
596
+ "learning_rate": 0.00019144126230158127,
597
+ "loss": 1.6227,
598
+ "step": 75
599
+ },
600
+ {
601
+ "epoch": 0.021461348393928697,
602
+ "grad_norm": 1.2003456354141235,
603
+ "learning_rate": 0.0001911798613412557,
604
+ "loss": 2.4362,
605
+ "step": 76
606
+ },
607
+ {
608
+ "epoch": 0.021743734557006705,
609
+ "grad_norm": 1.102264642715454,
610
+ "learning_rate": 0.0001909147123339575,
611
+ "loss": 2.3662,
612
+ "step": 77
613
+ },
614
+ {
615
+ "epoch": 0.022026120720084717,
616
+ "grad_norm": 0.7298341989517212,
617
+ "learning_rate": 0.0001906458261789238,
618
+ "loss": 2.8474,
619
+ "step": 78
620
+ },
621
+ {
622
+ "epoch": 0.022308506883162726,
623
+ "grad_norm": 1.0796235799789429,
624
+ "learning_rate": 0.00019037321392901136,
625
+ "loss": 1.5521,
626
+ "step": 79
627
+ },
628
+ {
629
+ "epoch": 0.022590893046240734,
630
+ "grad_norm": 0.7039556503295898,
631
+ "learning_rate": 0.0001900968867902419,
632
+ "loss": 2.47,
633
+ "step": 80
634
+ },
635
+ {
636
+ "epoch": 0.022590893046240734,
637
+ "eval_loss": 1.617271900177002,
638
+ "eval_runtime": 134.9383,
639
+ "eval_samples_per_second": 5.528,
640
+ "eval_steps_per_second": 5.528,
641
+ "step": 80
642
  }
643
  ],
644
  "logging_steps": 1,
 
667
  "attributes": {}
668
  }
669
  },
670
+ "total_flos": 7830428681502720.0,
671
  "train_batch_size": 1,
672
  "trial_name": null,
673
  "trial_params": null