Upload checkpoint 3200
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +115 -3
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4957560304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e57e3bfc2074a9ad40f5be7d0742e4d85689e9cfc6394e051207e55781a96a00
|
3 |
size 4957560304
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3989163248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6db52593f9080fddf1220f3d831f1ab34cd8350f2c5e814ac3bd9e263f00cbed
|
3 |
size 3989163248
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17893874312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff165cb496fd7093d743314d1f456b971f98bc37717d074d6a47adb5ffdca3f7
|
3 |
size 17893874312
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feda042eff907491a34cdbf179b85f590d010493a95828afe0b5cdb1928ddd85
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7357,6 +7357,118 @@
|
|
7357 |
"learning_rate": 2.143987624900945e-06,
|
7358 |
"loss": 0.5813,
|
7359 |
"step": 3150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7360 |
}
|
7361 |
],
|
7362 |
"logging_steps": 3,
|
@@ -7376,7 +7488,7 @@
|
|
7376 |
"attributes": {}
|
7377 |
}
|
7378 |
},
|
7379 |
-
"total_flos": 2.
|
7380 |
"train_batch_size": 8,
|
7381 |
"trial_name": null,
|
7382 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9489916963226572,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7357 |
"learning_rate": 2.143987624900945e-06,
|
7358 |
"loss": 0.5813,
|
7359 |
"step": 3150
|
7360 |
+
},
|
7361 |
+
{
|
7362 |
+
"epoch": 0.9350533807829181,
|
7363 |
+
"grad_norm": 0.259765625,
|
7364 |
+
"learning_rate": 2.0866345470076044e-06,
|
7365 |
+
"loss": 0.589,
|
7366 |
+
"step": 3153
|
7367 |
+
},
|
7368 |
+
{
|
7369 |
+
"epoch": 0.9359430604982206,
|
7370 |
+
"grad_norm": 0.279296875,
|
7371 |
+
"learning_rate": 2.0300509335123283e-06,
|
7372 |
+
"loss": 0.5971,
|
7373 |
+
"step": 3156
|
7374 |
+
},
|
7375 |
+
{
|
7376 |
+
"epoch": 0.9368327402135231,
|
7377 |
+
"grad_norm": 0.255859375,
|
7378 |
+
"learning_rate": 1.974237229084497e-06,
|
7379 |
+
"loss": 0.5808,
|
7380 |
+
"step": 3159
|
7381 |
+
},
|
7382 |
+
{
|
7383 |
+
"epoch": 0.9377224199288257,
|
7384 |
+
"grad_norm": 0.263671875,
|
7385 |
+
"learning_rate": 1.9191938723430615e-06,
|
7386 |
+
"loss": 0.6167,
|
7387 |
+
"step": 3162
|
7388 |
+
},
|
7389 |
+
{
|
7390 |
+
"epoch": 0.9386120996441281,
|
7391 |
+
"grad_norm": 0.2578125,
|
7392 |
+
"learning_rate": 1.8649212958531282e-06,
|
7393 |
+
"loss": 0.6088,
|
7394 |
+
"step": 3165
|
7395 |
+
},
|
7396 |
+
{
|
7397 |
+
"epoch": 0.9395017793594306,
|
7398 |
+
"grad_norm": 0.259765625,
|
7399 |
+
"learning_rate": 1.8114199261224928e-06,
|
7400 |
+
"loss": 0.5884,
|
7401 |
+
"step": 3168
|
7402 |
+
},
|
7403 |
+
{
|
7404 |
+
"epoch": 0.9403914590747331,
|
7405 |
+
"grad_norm": 0.265625,
|
7406 |
+
"learning_rate": 1.7586901835983437e-06,
|
7407 |
+
"loss": 0.6122,
|
7408 |
+
"step": 3171
|
7409 |
+
},
|
7410 |
+
{
|
7411 |
+
"epoch": 0.9412811387900356,
|
7412 |
+
"grad_norm": 0.259765625,
|
7413 |
+
"learning_rate": 1.7067324826639419e-06,
|
7414 |
+
"loss": 0.6036,
|
7415 |
+
"step": 3174
|
7416 |
+
},
|
7417 |
+
{
|
7418 |
+
"epoch": 0.9421708185053381,
|
7419 |
+
"grad_norm": 0.2578125,
|
7420 |
+
"learning_rate": 1.655547231635368e-06,
|
7421 |
+
"loss": 0.598,
|
7422 |
+
"step": 3177
|
7423 |
+
},
|
7424 |
+
{
|
7425 |
+
"epoch": 0.9430604982206405,
|
7426 |
+
"grad_norm": 0.26171875,
|
7427 |
+
"learning_rate": 1.6051348327583037e-06,
|
7428 |
+
"loss": 0.6078,
|
7429 |
+
"step": 3180
|
7430 |
+
},
|
7431 |
+
{
|
7432 |
+
"epoch": 0.943950177935943,
|
7433 |
+
"grad_norm": 0.251953125,
|
7434 |
+
"learning_rate": 1.5554956822048661e-06,
|
7435 |
+
"loss": 0.5955,
|
7436 |
+
"step": 3183
|
7437 |
+
},
|
7438 |
+
{
|
7439 |
+
"epoch": 0.9448398576512456,
|
7440 |
+
"grad_norm": 0.271484375,
|
7441 |
+
"learning_rate": 1.5066301700705331e-06,
|
7442 |
+
"loss": 0.589,
|
7443 |
+
"step": 3186
|
7444 |
+
},
|
7445 |
+
{
|
7446 |
+
"epoch": 0.9457295373665481,
|
7447 |
+
"grad_norm": 0.263671875,
|
7448 |
+
"learning_rate": 1.4585386803710021e-06,
|
7449 |
+
"loss": 0.6035,
|
7450 |
+
"step": 3189
|
7451 |
+
},
|
7452 |
+
{
|
7453 |
+
"epoch": 0.9466192170818505,
|
7454 |
+
"grad_norm": 0.265625,
|
7455 |
+
"learning_rate": 1.411221591039269e-06,
|
7456 |
+
"loss": 0.6396,
|
7457 |
+
"step": 3192
|
7458 |
+
},
|
7459 |
+
{
|
7460 |
+
"epoch": 0.947508896797153,
|
7461 |
+
"grad_norm": 0.263671875,
|
7462 |
+
"learning_rate": 1.3646792739225533e-06,
|
7463 |
+
"loss": 0.577,
|
7464 |
+
"step": 3195
|
7465 |
+
},
|
7466 |
+
{
|
7467 |
+
"epoch": 0.9483985765124555,
|
7468 |
+
"grad_norm": 0.2578125,
|
7469 |
+
"learning_rate": 1.3189120947794897e-06,
|
7470 |
+
"loss": 0.5983,
|
7471 |
+
"step": 3198
|
7472 |
}
|
7473 |
],
|
7474 |
"logging_steps": 3,
|
|
|
7488 |
"attributes": {}
|
7489 |
}
|
7490 |
},
|
7491 |
+
"total_flos": 2.0948834721476903e+19,
|
7492 |
"train_batch_size": 8,
|
7493 |
"trial_name": null,
|
7494 |
"trial_params": null
|