Training in progress, step 306500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891644712
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51adf105e6f6a7ed906d29c700a302ed25a5f3762591fb828115de3db85e6428
|
3 |
size 891644712
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783444357
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61e99f73129aa9790926d7e259881eb34dc636e42b1b114ed72b764955661475
|
3 |
size 1783444357
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de8a5728e1af3d048775ed91e97081b2078f2ef02825d44e2a1d3ed6f1e431ea
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5ef81669a76b9bc8adaca72bc010d0d224d425bd06e93da371512f70d617c1a
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4291,6 +4291,13 @@
|
|
4291 |
"learning_rate": 1.05808e-06,
|
4292 |
"loss": 0.2484,
|
4293 |
"step": 306000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4294 |
}
|
4295 |
],
|
4296 |
"logging_steps": 500,
|
@@ -4310,7 +4317,7 @@
|
|
4310 |
"attributes": {}
|
4311 |
}
|
4312 |
},
|
4313 |
-
"total_flos": 1.
|
4314 |
"train_batch_size": 64,
|
4315 |
"trial_name": null,
|
4316 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.9616,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 306500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4291 |
"learning_rate": 1.05808e-06,
|
4292 |
"loss": 0.2484,
|
4293 |
"step": 306000
|
4294 |
+
},
|
4295 |
+
{
|
4296 |
+
"epoch": 1.9616,
|
4297 |
+
"grad_norm": 0.5446351170539856,
|
4298 |
+
"learning_rate": 9.780799999999999e-07,
|
4299 |
+
"loss": 0.2434,
|
4300 |
+
"step": 306500
|
4301 |
}
|
4302 |
],
|
4303 |
"logging_steps": 500,
|
|
|
4317 |
"attributes": {}
|
4318 |
}
|
4319 |
},
|
4320 |
+
"total_flos": 1.49316474765312e+18,
|
4321 |
"train_batch_size": 64,
|
4322 |
"trial_name": null,
|
4323 |
"trial_params": null
|