qingy2024 commited on
Commit
cf02ba7
·
verified ·
1 Parent(s): c1b4c0d

Upload checkpoint 3200

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57dd35bc2f6ffb39aa8b1f29e5f5a136575feb2cfea6fb25c20608557674098f
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e57e3bfc2074a9ad40f5be7d0742e4d85689e9cfc6394e051207e55781a96a00
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e624a7a0172e021f4952abfbdca579a62820eee0e2b3e77971e4d3e381845328
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db52593f9080fddf1220f3d831f1ab34cd8350f2c5e814ac3bd9e263f00cbed
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cbc9c739918e3728ff92771938e8d94644dc34caafd6417d182451fadc9bb8f
3
  size 17893874312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff165cb496fd7093d743314d1f456b971f98bc37717d074d6a47adb5ffdca3f7
3
  size 17893874312
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d016b6de510b811bd11c3b655ffc83267ff09c3e71671a7fad681fb0f1d2317d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feda042eff907491a34cdbf179b85f590d010493a95828afe0b5cdb1928ddd85
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9341637010676157,
5
  "eval_steps": 500,
6
- "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7357,6 +7357,118 @@
7357
  "learning_rate": 2.143987624900945e-06,
7358
  "loss": 0.5813,
7359
  "step": 3150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7360
  }
7361
  ],
7362
  "logging_steps": 3,
@@ -7376,7 +7488,7 @@
7376
  "attributes": {}
7377
  }
7378
  },
7379
- "total_flos": 2.0621509178953826e+19,
7380
  "train_batch_size": 8,
7381
  "trial_name": null,
7382
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9489916963226572,
5
  "eval_steps": 500,
6
+ "global_step": 3200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7357
  "learning_rate": 2.143987624900945e-06,
7358
  "loss": 0.5813,
7359
  "step": 3150
7360
+ },
7361
+ {
7362
+ "epoch": 0.9350533807829181,
7363
+ "grad_norm": 0.259765625,
7364
+ "learning_rate": 2.0866345470076044e-06,
7365
+ "loss": 0.589,
7366
+ "step": 3153
7367
+ },
7368
+ {
7369
+ "epoch": 0.9359430604982206,
7370
+ "grad_norm": 0.279296875,
7371
+ "learning_rate": 2.0300509335123283e-06,
7372
+ "loss": 0.5971,
7373
+ "step": 3156
7374
+ },
7375
+ {
7376
+ "epoch": 0.9368327402135231,
7377
+ "grad_norm": 0.255859375,
7378
+ "learning_rate": 1.974237229084497e-06,
7379
+ "loss": 0.5808,
7380
+ "step": 3159
7381
+ },
7382
+ {
7383
+ "epoch": 0.9377224199288257,
7384
+ "grad_norm": 0.263671875,
7385
+ "learning_rate": 1.9191938723430615e-06,
7386
+ "loss": 0.6167,
7387
+ "step": 3162
7388
+ },
7389
+ {
7390
+ "epoch": 0.9386120996441281,
7391
+ "grad_norm": 0.2578125,
7392
+ "learning_rate": 1.8649212958531282e-06,
7393
+ "loss": 0.6088,
7394
+ "step": 3165
7395
+ },
7396
+ {
7397
+ "epoch": 0.9395017793594306,
7398
+ "grad_norm": 0.259765625,
7399
+ "learning_rate": 1.8114199261224928e-06,
7400
+ "loss": 0.5884,
7401
+ "step": 3168
7402
+ },
7403
+ {
7404
+ "epoch": 0.9403914590747331,
7405
+ "grad_norm": 0.265625,
7406
+ "learning_rate": 1.7586901835983437e-06,
7407
+ "loss": 0.6122,
7408
+ "step": 3171
7409
+ },
7410
+ {
7411
+ "epoch": 0.9412811387900356,
7412
+ "grad_norm": 0.259765625,
7413
+ "learning_rate": 1.7067324826639419e-06,
7414
+ "loss": 0.6036,
7415
+ "step": 3174
7416
+ },
7417
+ {
7418
+ "epoch": 0.9421708185053381,
7419
+ "grad_norm": 0.2578125,
7420
+ "learning_rate": 1.655547231635368e-06,
7421
+ "loss": 0.598,
7422
+ "step": 3177
7423
+ },
7424
+ {
7425
+ "epoch": 0.9430604982206405,
7426
+ "grad_norm": 0.26171875,
7427
+ "learning_rate": 1.6051348327583037e-06,
7428
+ "loss": 0.6078,
7429
+ "step": 3180
7430
+ },
7431
+ {
7432
+ "epoch": 0.943950177935943,
7433
+ "grad_norm": 0.251953125,
7434
+ "learning_rate": 1.5554956822048661e-06,
7435
+ "loss": 0.5955,
7436
+ "step": 3183
7437
+ },
7438
+ {
7439
+ "epoch": 0.9448398576512456,
7440
+ "grad_norm": 0.271484375,
7441
+ "learning_rate": 1.5066301700705331e-06,
7442
+ "loss": 0.589,
7443
+ "step": 3186
7444
+ },
7445
+ {
7446
+ "epoch": 0.9457295373665481,
7447
+ "grad_norm": 0.263671875,
7448
+ "learning_rate": 1.4585386803710021e-06,
7449
+ "loss": 0.6035,
7450
+ "step": 3189
7451
+ },
7452
+ {
7453
+ "epoch": 0.9466192170818505,
7454
+ "grad_norm": 0.265625,
7455
+ "learning_rate": 1.411221591039269e-06,
7456
+ "loss": 0.6396,
7457
+ "step": 3192
7458
+ },
7459
+ {
7460
+ "epoch": 0.947508896797153,
7461
+ "grad_norm": 0.263671875,
7462
+ "learning_rate": 1.3646792739225533e-06,
7463
+ "loss": 0.577,
7464
+ "step": 3195
7465
+ },
7466
+ {
7467
+ "epoch": 0.9483985765124555,
7468
+ "grad_norm": 0.2578125,
7469
+ "learning_rate": 1.3189120947794897e-06,
7470
+ "loss": 0.5983,
7471
+ "step": 3198
7472
  }
7473
  ],
7474
  "logging_steps": 3,
 
7488
  "attributes": {}
7489
  }
7490
  },
7491
+ "total_flos": 2.0948834721476903e+19,
7492
  "train_batch_size": 8,
7493
  "trial_name": null,
7494
  "trial_params": null