qingy2024 commited on
Commit
21f5eae
·
verified ·
1 Parent(s): 61b01c0

Upload checkpoint 3300

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fabce1a036331b94cdf566aa87c9ecf11584fa2ffb6b7be93064146a64f2230e
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:447ea1baf5ce4d3011df6f9cd40e05e076cbb62e1b5b4bd59b2b44e1b3600425
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:206358ae8b2081de1c7b1c84dbe9df7131ea730ad25beccd39f96f180a8d2041
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649859d9b29ef3ee4a62106e3696336aee4f9cf11919324ac3cb0cd58eca6bd1
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1229aa48756d90df1c044eded08744a22ecacd564b9281644ebae34dbe880887
3
  size 17893874312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34eabe2409758e1c785b61e49de22c99478d71b4e287056e9ac7b8e66f22d2e7
3
  size 17893874312
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f19d4f8529d4151f2aad4d583c0ecd97e68c1bbdb23e8cd6eee178553e9463b1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e3e5d81ec2d6c897aa97cd1ed656c7729f595b7fd89a2af10c54571be04f6c2
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9638196915776986,
5
  "eval_steps": 500,
6
- "global_step": 3250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7588,6 +7588,125 @@
7588
  "learning_rate": 6.597910240324967e-07,
7589
  "loss": 0.6038,
7590
  "step": 3249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7591
  }
7592
  ],
7593
  "logging_steps": 3,
@@ -7607,7 +7726,7 @@
7607
  "attributes": {}
7608
  }
7609
  },
7610
- "total_flos": 2.127616026399998e+19,
7611
  "train_batch_size": 8,
7612
  "trial_name": null,
7613
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9786476868327402,
5
  "eval_steps": 500,
6
+ "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7588
  "learning_rate": 6.597910240324967e-07,
7589
  "loss": 0.6038,
7590
  "step": 3249
7591
+ },
7592
+ {
7593
+ "epoch": 0.9644128113879004,
7594
+ "grad_norm": 0.26171875,
7595
+ "learning_rate": 6.280319158544989e-07,
7596
+ "loss": 0.6301,
7597
+ "step": 3252
7598
+ },
7599
+ {
7600
+ "epoch": 0.9653024911032029,
7601
+ "grad_norm": 0.265625,
7602
+ "learning_rate": 5.970537346853156e-07,
7603
+ "loss": 0.6007,
7604
+ "step": 3255
7605
+ },
7606
+ {
7607
+ "epoch": 0.9661921708185054,
7608
+ "grad_norm": 0.2470703125,
7609
+ "learning_rate": 5.668567239708323e-07,
7610
+ "loss": 0.5789,
7611
+ "step": 3258
7612
+ },
7613
+ {
7614
+ "epoch": 0.9670818505338078,
7615
+ "grad_norm": 0.265625,
7616
+ "learning_rate": 5.374411210180341e-07,
7617
+ "loss": 0.5964,
7618
+ "step": 3261
7619
+ },
7620
+ {
7621
+ "epoch": 0.9679715302491103,
7622
+ "grad_norm": 0.25390625,
7623
+ "learning_rate": 5.088071569931185e-07,
7624
+ "loss": 0.5953,
7625
+ "step": 3264
7626
+ },
7627
+ {
7628
+ "epoch": 0.9688612099644128,
7629
+ "grad_norm": 0.2431640625,
7630
+ "learning_rate": 4.809550569196519e-07,
7631
+ "loss": 0.5877,
7632
+ "step": 3267
7633
+ },
7634
+ {
7635
+ "epoch": 0.9697508896797153,
7636
+ "grad_norm": 0.263671875,
7637
+ "learning_rate": 4.5388503967683793e-07,
7638
+ "loss": 0.5923,
7639
+ "step": 3270
7640
+ },
7641
+ {
7642
+ "epoch": 0.9706405693950177,
7643
+ "grad_norm": 0.255859375,
7644
+ "learning_rate": 4.275973179977855e-07,
7645
+ "loss": 0.5958,
7646
+ "step": 3273
7647
+ },
7648
+ {
7649
+ "epoch": 0.9715302491103203,
7650
+ "grad_norm": 0.271484375,
7651
+ "learning_rate": 4.0209209846783224e-07,
7652
+ "loss": 0.5977,
7653
+ "step": 3276
7654
+ },
7655
+ {
7656
+ "epoch": 0.9724199288256228,
7657
+ "grad_norm": 0.265625,
7658
+ "learning_rate": 3.773695815229239e-07,
7659
+ "loss": 0.592,
7660
+ "step": 3279
7661
+ },
7662
+ {
7663
+ "epoch": 0.9733096085409253,
7664
+ "grad_norm": 0.248046875,
7665
+ "learning_rate": 3.534299614480596e-07,
7666
+ "loss": 0.5702,
7667
+ "step": 3282
7668
+ },
7669
+ {
7670
+ "epoch": 0.9741992882562278,
7671
+ "grad_norm": 0.255859375,
7672
+ "learning_rate": 3.3027342637572676e-07,
7673
+ "loss": 0.5893,
7674
+ "step": 3285
7675
+ },
7676
+ {
7677
+ "epoch": 0.9750889679715302,
7678
+ "grad_norm": 0.251953125,
7679
+ "learning_rate": 3.079001582844354e-07,
7680
+ "loss": 0.6177,
7681
+ "step": 3288
7682
+ },
7683
+ {
7684
+ "epoch": 0.9759786476868327,
7685
+ "grad_norm": 0.341796875,
7686
+ "learning_rate": 2.8631033299730825e-07,
7687
+ "loss": 0.6178,
7688
+ "step": 3291
7689
+ },
7690
+ {
7691
+ "epoch": 0.9768683274021353,
7692
+ "grad_norm": 0.255859375,
7693
+ "learning_rate": 2.655041201806707e-07,
7694
+ "loss": 0.5924,
7695
+ "step": 3294
7696
+ },
7697
+ {
7698
+ "epoch": 0.9777580071174378,
7699
+ "grad_norm": 0.259765625,
7700
+ "learning_rate": 2.454816833427631e-07,
7701
+ "loss": 0.6021,
7702
+ "step": 3297
7703
+ },
7704
+ {
7705
+ "epoch": 0.9786476868327402,
7706
+ "grad_norm": 0.2578125,
7707
+ "learning_rate": 2.2624317983239718e-07,
7708
+ "loss": 0.6131,
7709
+ "step": 3300
7710
  }
7711
  ],
7712
  "logging_steps": 3,
 
7726
  "attributes": {}
7727
  }
7728
  },
7729
+ "total_flos": 2.1603485806523056e+19,
7730
  "train_batch_size": 8,
7731
  "trial_name": null,
7732
  "trial_params": null