iamnguyen commited on
Commit
a601895
·
verified ·
1 Parent(s): b4f839b

Training in progress, step 96, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd5ff5271312c471c1e5dff507f0c30a550d9a37628b9a6d4a570b4ea0de29a7
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48183ccb0178125cc010c0512b0249661aa8bcda24238fa1ad703f3707aa9ee5
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28591169371e82e7643c9af0f37c54468a070ed22b024fbe67f162d95110bc52
3
  size 240728084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330b34cef66bec5a4bb1e99534643208c6b0a5ea8737d041e4be17f7fef4502c
3
  size 240728084
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e648af67e02c768e1b8b9f76c18345c49f603e195d2b3ca0c4f4581aa1076ae
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc8a4187f1b7f884b08d389739f293db04ef58965aed4a5d8cc45be3c4d4da9c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.005172103773413366,
5
  "eval_steps": 500,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -567,6 +567,118 @@
567
  "learning_rate": 5.161290322580646e-06,
568
  "loss": 1.3231,
569
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  }
571
  ],
572
  "logging_steps": 1,
@@ -586,7 +698,7 @@
586
  "attributes": {}
587
  }
588
  },
589
- "total_flos": 5.178532622222131e+16,
590
  "train_batch_size": 2,
591
  "trial_name": null,
592
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.00620652452809604,
5
  "eval_steps": 500,
6
+ "global_step": 96,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
567
  "learning_rate": 5.161290322580646e-06,
568
  "loss": 1.3231,
569
  "step": 80
570
+ },
571
+ {
572
+ "epoch": 0.005236755070581033,
573
+ "grad_norm": 6.0287041664123535,
574
+ "learning_rate": 5.2258064516129035e-06,
575
+ "loss": 1.4106,
576
+ "step": 81
577
+ },
578
+ {
579
+ "epoch": 0.0053014063677487,
580
+ "grad_norm": 5.727312088012695,
581
+ "learning_rate": 5.290322580645162e-06,
582
+ "loss": 1.52,
583
+ "step": 82
584
+ },
585
+ {
586
+ "epoch": 0.005366057664916367,
587
+ "grad_norm": 4.75112771987915,
588
+ "learning_rate": 5.35483870967742e-06,
589
+ "loss": 1.434,
590
+ "step": 83
591
+ },
592
+ {
593
+ "epoch": 0.005430708962084034,
594
+ "grad_norm": 5.614027500152588,
595
+ "learning_rate": 5.419354838709678e-06,
596
+ "loss": 1.4501,
597
+ "step": 84
598
+ },
599
+ {
600
+ "epoch": 0.005495360259251702,
601
+ "grad_norm": 6.246868133544922,
602
+ "learning_rate": 5.483870967741935e-06,
603
+ "loss": 1.4065,
604
+ "step": 85
605
+ },
606
+ {
607
+ "epoch": 0.005560011556419369,
608
+ "grad_norm": 4.8930559158325195,
609
+ "learning_rate": 5.548387096774194e-06,
610
+ "loss": 1.4059,
611
+ "step": 86
612
+ },
613
+ {
614
+ "epoch": 0.005624662853587036,
615
+ "grad_norm": 9.081551551818848,
616
+ "learning_rate": 5.612903225806452e-06,
617
+ "loss": 1.4045,
618
+ "step": 87
619
+ },
620
+ {
621
+ "epoch": 0.005689314150754703,
622
+ "grad_norm": 6.593941688537598,
623
+ "learning_rate": 5.677419354838711e-06,
624
+ "loss": 1.4229,
625
+ "step": 88
626
+ },
627
+ {
628
+ "epoch": 0.00575396544792237,
629
+ "grad_norm": 4.863624095916748,
630
+ "learning_rate": 5.7419354838709685e-06,
631
+ "loss": 1.4073,
632
+ "step": 89
633
+ },
634
+ {
635
+ "epoch": 0.005818616745090037,
636
+ "grad_norm": 5.167389392852783,
637
+ "learning_rate": 5.806451612903226e-06,
638
+ "loss": 1.5046,
639
+ "step": 90
640
+ },
641
+ {
642
+ "epoch": 0.005883268042257704,
643
+ "grad_norm": 4.816722869873047,
644
+ "learning_rate": 5.8709677419354835e-06,
645
+ "loss": 1.4358,
646
+ "step": 91
647
+ },
648
+ {
649
+ "epoch": 0.005947919339425371,
650
+ "grad_norm": 5.505555629730225,
651
+ "learning_rate": 5.935483870967742e-06,
652
+ "loss": 1.5727,
653
+ "step": 92
654
+ },
655
+ {
656
+ "epoch": 0.006012570636593038,
657
+ "grad_norm": 5.764698505401611,
658
+ "learning_rate": 6e-06,
659
+ "loss": 1.4799,
660
+ "step": 93
661
+ },
662
+ {
663
+ "epoch": 0.006077221933760706,
664
+ "grad_norm": 5.312406063079834,
665
+ "learning_rate": 6.064516129032259e-06,
666
+ "loss": 1.5233,
667
+ "step": 94
668
+ },
669
+ {
670
+ "epoch": 0.006141873230928373,
671
+ "grad_norm": 6.0215253829956055,
672
+ "learning_rate": 6.129032258064517e-06,
673
+ "loss": 1.3764,
674
+ "step": 95
675
+ },
676
+ {
677
+ "epoch": 0.00620652452809604,
678
+ "grad_norm": 6.582176208496094,
679
+ "learning_rate": 6.193548387096775e-06,
680
+ "loss": 1.3687,
681
+ "step": 96
682
  }
683
  ],
684
  "logging_steps": 1,
 
698
  "attributes": {}
699
  }
700
  },
701
+ "total_flos": 6.186692359677542e+16,
702
  "train_batch_size": 2,
703
  "trial_name": null,
704
  "trial_params": null