diagonalge commited on
Commit
f6c5f5b
·
verified ·
1 Parent(s): 3e6d5a6

Training in progress, step 90, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b120c235acc603d7e0a88df6eb116c05a31d43ac85e97d14eca60636aa4e9e1
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcb30213c268b08c544b2c7db8161f621672109ce6ac437fbf66b545997526d
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c28a74ee961f8b24fb2735619f4f437d34485673c89b57deafa93df5086d70b
3
  size 52046596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0645116a7c0984cadcde7eccc2e07e2cf29f24691185e9c2bb5c66a51d9adad
3
  size 52046596
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cd11dbeb5727fc345d367ecbecf218eaa06a9e3b7752506d05b1cc38c858f04
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0580e74248926dc2cb618fc9ab207371e56a1f66f91fbcaaa73e7d0342f25366
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49b8a1dbbf2c2a0b7fde326d57c34bd6c5e5d17e0aaf8b19016c1f721c049db1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb97b24f34ff3e53eec5be9cf35c1a7161c58dbc2fed7dda160fb3eb64e5f353
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.01006985965133111,
5
  "eval_steps": 25,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -599,6 +599,76 @@
599
  "learning_rate": 2.339555568810221e-05,
600
  "loss": 0.0003,
601
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
602
  }
603
  ],
604
  "logging_steps": 1,
@@ -618,7 +688,7 @@
618
  "attributes": {}
619
  }
620
  },
621
- "total_flos": 5.26830721302528e+16,
622
  "train_batch_size": 2,
623
  "trial_name": null,
624
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.011328592107747499,
5
  "eval_steps": 25,
6
+ "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
599
  "learning_rate": 2.339555568810221e-05,
600
  "loss": 0.0003,
601
  "step": 80
602
+ },
603
+ {
604
+ "epoch": 0.010195732896972749,
605
+ "grad_norm": 5.916388511657715,
606
+ "learning_rate": 2.119892463932781e-05,
607
+ "loss": 1.0992,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 0.010321606142614388,
612
+ "grad_norm": 0.010422502644360065,
613
+ "learning_rate": 1.9098300562505266e-05,
614
+ "loss": 0.0005,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 0.010447479388256025,
619
+ "grad_norm": 3.361562728881836,
620
+ "learning_rate": 1.7096242744495837e-05,
621
+ "loss": 0.1324,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 0.010573352633897665,
626
+ "grad_norm": 0.04859397932887077,
627
+ "learning_rate": 1.5195190384357404e-05,
628
+ "loss": 0.0013,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 0.010699225879539304,
633
+ "grad_norm": 0.023048996925354004,
634
+ "learning_rate": 1.339745962155613e-05,
635
+ "loss": 0.0011,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 0.010825099125180943,
640
+ "grad_norm": 0.02878495492041111,
641
+ "learning_rate": 1.1705240714107302e-05,
642
+ "loss": 0.0012,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 0.010950972370822582,
647
+ "grad_norm": 0.03888264298439026,
648
+ "learning_rate": 1.0120595370083318e-05,
649
+ "loss": 0.0033,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 0.01107684561646422,
654
+ "grad_norm": 3.1203830242156982,
655
+ "learning_rate": 8.645454235739903e-06,
656
+ "loss": 0.1787,
657
+ "step": 88
658
+ },
659
+ {
660
+ "epoch": 0.01120271886210586,
661
+ "grad_norm": 3.7907369136810303,
662
+ "learning_rate": 7.281614543321269e-06,
663
+ "loss": 0.3365,
664
+ "step": 89
665
+ },
666
+ {
667
+ "epoch": 0.011328592107747499,
668
+ "grad_norm": 2.6835732460021973,
669
+ "learning_rate": 6.030737921409169e-06,
670
+ "loss": 0.0975,
671
+ "step": 90
672
  }
673
  ],
674
  "logging_steps": 1,
 
688
  "attributes": {}
689
  }
690
  },
691
+ "total_flos": 5.92684561465344e+16,
692
  "train_batch_size": 2,
693
  "trial_name": null,
694
  "trial_params": null