besimray commited on
Commit
2709727
·
verified ·
1 Parent(s): 5bdaa25

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5ddf2f84301434e699d9c2f35b9d9105c9f4f2d8b564220a9bd359c56851edc
3
  size 90207248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05a0608a0d108d197d5820e3b216e8d7a09ddb76226408ab8f49c2438e9fb16d
3
  size 90207248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d469aaf72c006ee445cf4f9b87e5f5cc4be9a600f4e6cccf5710fa260e9f24f8
3
  size 46057082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:761ab0ea9c61d6e62dfbe008243dbfa326d67c392160d21b091f12b6a9a11a62
3
  size 46057082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:342d36d482e38fbc36cc0a28040792e6a4257d06cd930a5ace345aee66811294
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:597c452dbb6d2e2055a44445aa9e337aa4085eacd11fcbc7213a5eb6f6055867
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1074437c4c638775a20a86de2e689d1a14d0ff4e4137df2ddeb45e94a776caef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7803647d52fbd7429a283dc695ba7cf653ff890c06d5c50f67d0a09610438889
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6125953197479248,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-70",
4
- "epoch": 0.019767031415460642,
5
  "eval_steps": 10,
6
- "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -561,6 +561,84 @@
561
  "eval_samples_per_second": 5.591,
562
  "eval_steps_per_second": 5.591,
563
  "step": 70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  }
565
  ],
566
  "logging_steps": 1,
@@ -589,7 +667,7 @@
589
  "attributes": {}
590
  }
591
  },
592
- "total_flos": 6851625096314880.0,
593
  "train_batch_size": 1,
594
  "trial_name": null,
595
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.602448582649231,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
+ "epoch": 0.022590893046240734,
5
  "eval_steps": 10,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
561
  "eval_samples_per_second": 5.591,
562
  "eval_steps_per_second": 5.591,
563
  "step": 70
564
+ },
565
+ {
566
+ "epoch": 0.02004941757853865,
567
+ "grad_norm": 2.52913761138916,
568
+ "learning_rate": 0.00019244917309000817,
569
+ "loss": 1.722,
570
+ "step": 71
571
+ },
572
+ {
573
+ "epoch": 0.02033180374161666,
574
+ "grad_norm": 7.770167350769043,
575
+ "learning_rate": 0.00019220287022200707,
576
+ "loss": 2.7036,
577
+ "step": 72
578
+ },
579
+ {
580
+ "epoch": 0.02061418990469467,
581
+ "grad_norm": 1.7608412504196167,
582
+ "learning_rate": 0.0001919527772551451,
583
+ "loss": 1.2768,
584
+ "step": 73
585
+ },
586
+ {
587
+ "epoch": 0.02089657606777268,
588
+ "grad_norm": 2.3405442237854004,
589
+ "learning_rate": 0.00019169890446976454,
590
+ "loss": 2.2241,
591
+ "step": 74
592
+ },
593
+ {
594
+ "epoch": 0.02117896223085069,
595
+ "grad_norm": 2.386042356491089,
596
+ "learning_rate": 0.00019144126230158127,
597
+ "loss": 1.3922,
598
+ "step": 75
599
+ },
600
+ {
601
+ "epoch": 0.021461348393928697,
602
+ "grad_norm": 2.280710458755493,
603
+ "learning_rate": 0.0001911798613412557,
604
+ "loss": 1.608,
605
+ "step": 76
606
+ },
607
+ {
608
+ "epoch": 0.021743734557006705,
609
+ "grad_norm": 1.2972298860549927,
610
+ "learning_rate": 0.0001909147123339575,
611
+ "loss": 2.1776,
612
+ "step": 77
613
+ },
614
+ {
615
+ "epoch": 0.022026120720084717,
616
+ "grad_norm": 1.4631404876708984,
617
+ "learning_rate": 0.0001906458261789238,
618
+ "loss": 3.1008,
619
+ "step": 78
620
+ },
621
+ {
622
+ "epoch": 0.022308506883162726,
623
+ "grad_norm": 1.0595492124557495,
624
+ "learning_rate": 0.00019037321392901136,
625
+ "loss": 1.3511,
626
+ "step": 79
627
+ },
628
+ {
629
+ "epoch": 0.022590893046240734,
630
+ "grad_norm": 0.9610152244567871,
631
+ "learning_rate": 0.0001900968867902419,
632
+ "loss": 2.1504,
633
+ "step": 80
634
+ },
635
+ {
636
+ "epoch": 0.022590893046240734,
637
+ "eval_loss": 1.602448582649231,
638
+ "eval_runtime": 133.5988,
639
+ "eval_samples_per_second": 5.584,
640
+ "eval_steps_per_second": 5.584,
641
+ "step": 80
642
  }
643
  ],
644
  "logging_steps": 1,
 
667
  "attributes": {}
668
  }
669
  },
670
+ "total_flos": 7830428681502720.0,
671
  "train_batch_size": 1,
672
  "trial_name": null,
673
  "trial_params": null