Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/global_step1581/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +2 -2
- last-checkpoint/global_step1581/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +2 -2
- last-checkpoint/global_step1581/zero_pp_rank_0_mp_rank_00_model_states.pt +2 -2
- last-checkpoint/global_step1581/zero_pp_rank_1_mp_rank_00_model_states.pt +2 -2
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/trainer_state.json +36 -5
last-checkpoint/global_step1581/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91eacaa84680aa82b20a3df8182dc93d0ce047ed089f25b4731f846316837cb6
|
3 |
+
size 28315088
|
last-checkpoint/global_step1581/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c297b2e76489c10915e73396d64e91d8ba6c2dd74c95b2df3b59bea16e5b8948
|
3 |
+
size 28315088
|
last-checkpoint/global_step1581/zero_pp_rank_0_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0cfb139e4c8441e6ab6518b594be2262638a6362ae882d8fa66311577c967c5
|
3 |
+
size 2850543502
|
last-checkpoint/global_step1581/zero_pp_rank_1_mp_rank_00_model_states.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a28252787a7c5302116de0d3ed69eaebf466c0d5f045b2ab3ef114b4d7e96000
|
3 |
+
size 2850543310
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step1581
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f08f5222c04ba8a25b70996b406c1e95dc834c73f15f3e7248da94c55fa3d6d3
|
3 |
size 14512
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14512
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:707f46a874ccec694d9a9ebdb8230159a79dc68e5bca12742f90f6e6d892b27e
|
3 |
size 14512
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 2.
|
3 |
-
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -32,13 +32,44 @@
|
|
32 |
"eval_steps_per_second": 0.126,
|
33 |
"eval_translation_length": 53098,
|
34 |
"step": 790
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
}
|
36 |
],
|
37 |
"logging_steps": 500,
|
38 |
"max_steps": 23700,
|
39 |
"num_train_epochs": 30,
|
40 |
"save_steps": 500,
|
41 |
-
"total_flos":
|
42 |
"trial_name": null,
|
43 |
"trial_params": null
|
44 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 2.288722038269043,
|
3 |
+
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-1581",
|
4 |
+
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1581,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
32 |
"eval_steps_per_second": 0.126,
|
33 |
"eval_translation_length": 53098,
|
34 |
"step": 790
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"epoch": 1.27,
|
38 |
+
"learning_rate": 9.445572420019074e-05,
|
39 |
+
"loss": 2.5652,
|
40 |
+
"step": 1000
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 1.9,
|
44 |
+
"learning_rate": 9.999999999999999e-05,
|
45 |
+
"loss": 2.468,
|
46 |
+
"step": 1500
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"epoch": 2.0,
|
50 |
+
"eval_bleu": 1.0,
|
51 |
+
"eval_brevity_penalty": 1.0,
|
52 |
+
"eval_length_ratio": 1.0,
|
53 |
+
"eval_loss": 2.288722038269043,
|
54 |
+
"eval_precisions": [
|
55 |
+
1.0,
|
56 |
+
1.0,
|
57 |
+
1.0,
|
58 |
+
1.0
|
59 |
+
],
|
60 |
+
"eval_reference_length": 53391,
|
61 |
+
"eval_runtime": 1340.9293,
|
62 |
+
"eval_samples_per_second": 2.096,
|
63 |
+
"eval_steps_per_second": 0.131,
|
64 |
+
"eval_translation_length": 53391,
|
65 |
+
"step": 1581
|
66 |
}
|
67 |
],
|
68 |
"logging_steps": 500,
|
69 |
"max_steps": 23700,
|
70 |
"num_train_epochs": 30,
|
71 |
"save_steps": 500,
|
72 |
+
"total_flos": 771945142419456.0,
|
73 |
"trial_name": null,
|
74 |
"trial_params": null
|
75 |
}
|