qingy2024 commited on
Commit
c1b4c0d
·
verified ·
1 Parent(s): 50e3376

Upload checkpoint 3150

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2169e58c8989a1e21145cbb3344f682c07fb03f8e1d2b3415d1fb54bf51fe94
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57dd35bc2f6ffb39aa8b1f29e5f5a136575feb2cfea6fb25c20608557674098f
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9585e567e29098783a70a169ea5b462f55516e649fb34b65e156fb7731d93458
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e624a7a0172e021f4952abfbdca579a62820eee0e2b3e77971e4d3e381845328
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cf824060625e49e48aeba0c2aaf08f1341a310120ee7a5f582fade92bc82a3f
3
  size 17893874312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cbc9c739918e3728ff92771938e8d94644dc34caafd6417d182451fadc9bb8f
3
  size 17893874312
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8df9f252c6ffa775a0f749e6642c4aedea5bcba170d0101e8c19bd63ddb59f2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d016b6de510b811bd11c3b655ffc83267ff09c3e71671a7fad681fb0f1d2317d
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9193357058125742,
5
  "eval_steps": 500,
6
- "global_step": 3100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7238,6 +7238,125 @@
7238
  "learning_rate": 3.236257876723725e-06,
7239
  "loss": 0.5991,
7240
  "step": 3099
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7241
  }
7242
  ],
7243
  "logging_steps": 3,
@@ -7257,7 +7376,7 @@
7257
  "attributes": {}
7258
  }
7259
  },
7260
- "total_flos": 2.029418363643075e+19,
7261
  "train_batch_size": 8,
7262
  "trial_name": null,
7263
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9341637010676157,
5
  "eval_steps": 500,
6
+ "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7238
  "learning_rate": 3.236257876723725e-06,
7239
  "loss": 0.5991,
7240
  "step": 3099
7241
+ },
7242
+ {
7243
+ "epoch": 0.9199288256227758,
7244
+ "grad_norm": 0.259765625,
7245
+ "learning_rate": 3.165897783676275e-06,
7246
+ "loss": 0.5901,
7247
+ "step": 3102
7248
+ },
7249
+ {
7250
+ "epoch": 0.9208185053380783,
7251
+ "grad_norm": 0.27734375,
7252
+ "learning_rate": 3.0962986735020738e-06,
7253
+ "loss": 0.6183,
7254
+ "step": 3105
7255
+ },
7256
+ {
7257
+ "epoch": 0.9217081850533808,
7258
+ "grad_norm": 0.255859375,
7259
+ "learning_rate": 3.027461093154449e-06,
7260
+ "loss": 0.5892,
7261
+ "step": 3108
7262
+ },
7263
+ {
7264
+ "epoch": 0.9225978647686833,
7265
+ "grad_norm": 0.2578125,
7266
+ "learning_rate": 2.959385583602081e-06,
7267
+ "loss": 0.6269,
7268
+ "step": 3111
7269
+ },
7270
+ {
7271
+ "epoch": 0.9234875444839857,
7272
+ "grad_norm": 0.26171875,
7273
+ "learning_rate": 2.8920726798248643e-06,
7274
+ "loss": 0.5946,
7275
+ "step": 3114
7276
+ },
7277
+ {
7278
+ "epoch": 0.9243772241992882,
7279
+ "grad_norm": 0.259765625,
7280
+ "learning_rate": 2.8255229108096527e-06,
7281
+ "loss": 0.6192,
7282
+ "step": 3117
7283
+ },
7284
+ {
7285
+ "epoch": 0.9252669039145908,
7286
+ "grad_norm": 0.255859375,
7287
+ "learning_rate": 2.7597367995461086e-06,
7288
+ "loss": 0.6153,
7289
+ "step": 3120
7290
+ },
7291
+ {
7292
+ "epoch": 0.9261565836298933,
7293
+ "grad_norm": 0.265625,
7294
+ "learning_rate": 2.694714863022585e-06,
7295
+ "loss": 0.5831,
7296
+ "step": 3123
7297
+ },
7298
+ {
7299
+ "epoch": 0.9270462633451957,
7300
+ "grad_norm": 0.267578125,
7301
+ "learning_rate": 2.6304576122221035e-06,
7302
+ "loss": 0.5898,
7303
+ "step": 3126
7304
+ },
7305
+ {
7306
+ "epoch": 0.9279359430604982,
7307
+ "grad_norm": 0.2490234375,
7308
+ "learning_rate": 2.566965552118272e-06,
7309
+ "loss": 0.6098,
7310
+ "step": 3129
7311
+ },
7312
+ {
7313
+ "epoch": 0.9288256227758007,
7314
+ "grad_norm": 0.251953125,
7315
+ "learning_rate": 2.504239181671353e-06,
7316
+ "loss": 0.5932,
7317
+ "step": 3132
7318
+ },
7319
+ {
7320
+ "epoch": 0.9297153024911032,
7321
+ "grad_norm": 0.259765625,
7322
+ "learning_rate": 2.4422789938243763e-06,
7323
+ "loss": 0.5877,
7324
+ "step": 3135
7325
+ },
7326
+ {
7327
+ "epoch": 0.9306049822064056,
7328
+ "grad_norm": 0.279296875,
7329
+ "learning_rate": 2.381085475499201e-06,
7330
+ "loss": 0.5755,
7331
+ "step": 3138
7332
+ },
7333
+ {
7334
+ "epoch": 0.9314946619217082,
7335
+ "grad_norm": 0.263671875,
7336
+ "learning_rate": 2.3206591075927376e-06,
7337
+ "loss": 0.5875,
7338
+ "step": 3141
7339
+ },
7340
+ {
7341
+ "epoch": 0.9323843416370107,
7342
+ "grad_norm": 0.265625,
7343
+ "learning_rate": 2.2610003649731092e-06,
7344
+ "loss": 0.6113,
7345
+ "step": 3144
7346
+ },
7347
+ {
7348
+ "epoch": 0.9332740213523132,
7349
+ "grad_norm": 0.2578125,
7350
+ "learning_rate": 2.2021097164760085e-06,
7351
+ "loss": 0.6035,
7352
+ "step": 3147
7353
+ },
7354
+ {
7355
+ "epoch": 0.9341637010676157,
7356
+ "grad_norm": 0.26171875,
7357
+ "learning_rate": 2.143987624900945e-06,
7358
+ "loss": 0.5813,
7359
+ "step": 3150
7360
  }
7361
  ],
7362
  "logging_steps": 3,
 
7376
  "attributes": {}
7377
  }
7378
  },
7379
+ "total_flos": 2.0621509178953826e+19,
7380
  "train_batch_size": 8,
7381
  "trial_name": null,
7382
  "trial_params": null