qingy2024 commited on
Commit
61b01c0
·
verified ·
1 Parent(s): cf02ba7

Upload checkpoint 3250

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e57e3bfc2074a9ad40f5be7d0742e4d85689e9cfc6394e051207e55781a96a00
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fabce1a036331b94cdf566aa87c9ecf11584fa2ffb6b7be93064146a64f2230e
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6db52593f9080fddf1220f3d831f1ab34cd8350f2c5e814ac3bd9e263f00cbed
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:206358ae8b2081de1c7b1c84dbe9df7131ea730ad25beccd39f96f180a8d2041
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff165cb496fd7093d743314d1f456b971f98bc37717d074d6a47adb5ffdca3f7
3
  size 17893874312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1229aa48756d90df1c044eded08744a22ecacd564b9281644ebae34dbe880887
3
  size 17893874312
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feda042eff907491a34cdbf179b85f590d010493a95828afe0b5cdb1928ddd85
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19d4f8529d4151f2aad4d583c0ecd97e68c1bbdb23e8cd6eee178553e9463b1
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9489916963226572,
5
  "eval_steps": 500,
6
- "global_step": 3200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7469,6 +7469,125 @@
7469
  "learning_rate": 1.3189120947794897e-06,
7470
  "loss": 0.5983,
7471
  "step": 3198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7472
  }
7473
  ],
7474
  "logging_steps": 3,
@@ -7488,7 +7607,7 @@
7488
  "attributes": {}
7489
  }
7490
  },
7491
- "total_flos": 2.0948834721476903e+19,
7492
  "train_batch_size": 8,
7493
  "trial_name": null,
7494
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9638196915776986,
5
  "eval_steps": 500,
6
+ "global_step": 3250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7469
  "learning_rate": 1.3189120947794897e-06,
7470
  "loss": 0.5983,
7471
  "step": 3198
7472
+ },
7473
+ {
7474
+ "epoch": 0.949288256227758,
7475
+ "grad_norm": 0.263671875,
7476
+ "learning_rate": 1.273920413277152e-06,
7477
+ "loss": 0.6093,
7478
+ "step": 3201
7479
+ },
7480
+ {
7481
+ "epoch": 0.9501779359430605,
7482
+ "grad_norm": 0.263671875,
7483
+ "learning_rate": 1.2297045829882892e-06,
7484
+ "loss": 0.5966,
7485
+ "step": 3204
7486
+ },
7487
+ {
7488
+ "epoch": 0.951067615658363,
7489
+ "grad_norm": 0.265625,
7490
+ "learning_rate": 1.186264951388516e-06,
7491
+ "loss": 0.6228,
7492
+ "step": 3207
7493
+ },
7494
+ {
7495
+ "epoch": 0.9519572953736655,
7496
+ "grad_norm": 0.26953125,
7497
+ "learning_rate": 1.1436018598535937e-06,
7498
+ "loss": 0.6083,
7499
+ "step": 3210
7500
+ },
7501
+ {
7502
+ "epoch": 0.952846975088968,
7503
+ "grad_norm": 0.25390625,
7504
+ "learning_rate": 1.1017156436567532e-06,
7505
+ "loss": 0.5806,
7506
+ "step": 3213
7507
+ },
7508
+ {
7509
+ "epoch": 0.9537366548042705,
7510
+ "grad_norm": 0.267578125,
7511
+ "learning_rate": 1.0606066319660435e-06,
7512
+ "loss": 0.579,
7513
+ "step": 3216
7514
+ },
7515
+ {
7516
+ "epoch": 0.9546263345195729,
7517
+ "grad_norm": 0.263671875,
7518
+ "learning_rate": 1.020275147841765e-06,
7519
+ "loss": 0.6053,
7520
+ "step": 3219
7521
+ },
7522
+ {
7523
+ "epoch": 0.9555160142348754,
7524
+ "grad_norm": 0.26171875,
7525
+ "learning_rate": 9.807215082339394e-07,
7526
+ "loss": 0.595,
7527
+ "step": 3222
7528
+ },
7529
+ {
7530
+ "epoch": 0.9564056939501779,
7531
+ "grad_norm": 0.25,
7532
+ "learning_rate": 9.41946023979745e-07,
7533
+ "loss": 0.5857,
7534
+ "step": 3225
7535
+ },
7536
+ {
7537
+ "epoch": 0.9572953736654805,
7538
+ "grad_norm": 0.265625,
7539
+ "learning_rate": 9.039489998011852e-07,
7540
+ "loss": 0.6189,
7541
+ "step": 3228
7542
+ },
7543
+ {
7544
+ "epoch": 0.958185053380783,
7545
+ "grad_norm": 0.26171875,
7546
+ "learning_rate": 8.66730734302601e-07,
7547
+ "loss": 0.5837,
7548
+ "step": 3231
7549
+ },
7550
+ {
7551
+ "epoch": 0.9590747330960854,
7552
+ "grad_norm": 0.25,
7553
+ "learning_rate": 8.302915199683737e-07,
7554
+ "loss": 0.5827,
7555
+ "step": 3234
7556
+ },
7557
+ {
7558
+ "epoch": 0.9599644128113879,
7559
+ "grad_norm": 0.259765625,
7560
+ "learning_rate": 7.94631643160626e-07,
7561
+ "loss": 0.6043,
7562
+ "step": 3237
7563
+ },
7564
+ {
7565
+ "epoch": 0.9608540925266904,
7566
+ "grad_norm": 0.26171875,
7567
+ "learning_rate": 7.597513841169468e-07,
7568
+ "loss": 0.5621,
7569
+ "step": 3240
7570
+ },
7571
+ {
7572
+ "epoch": 0.9617437722419929,
7573
+ "grad_norm": 0.26953125,
7574
+ "learning_rate": 7.256510169482034e-07,
7575
+ "loss": 0.5886,
7576
+ "step": 3243
7577
+ },
7578
+ {
7579
+ "epoch": 0.9626334519572953,
7580
+ "grad_norm": 0.279296875,
7581
+ "learning_rate": 6.923308096363879e-07,
7582
+ "loss": 0.6205,
7583
+ "step": 3246
7584
+ },
7585
+ {
7586
+ "epoch": 0.9635231316725978,
7587
+ "grad_norm": 0.267578125,
7588
+ "learning_rate": 6.597910240324967e-07,
7589
+ "loss": 0.6038,
7590
+ "step": 3249
7591
  }
7592
  ],
7593
  "logging_steps": 3,
 
7607
  "attributes": {}
7608
  }
7609
  },
7610
+ "total_flos": 2.127616026399998e+19,
7611
  "train_batch_size": 8,
7612
  "trial_name": null,
7613
  "trial_params": null