Training in progress, step 2115, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +109 -4
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3463bb2fcbde477a6451746c5e93c2c02d428df311895e645e51bd4d7bc5770
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45759926af3c5a130310d7c3435a2592683cc6f827fa760589170a59285976df
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac0546bb14dfa550d667edb17bad381998c91ad46b19cf4fa9c242b50a816ed5
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94c0f6244ae9eafb7bfc7053255ee1f03da545471ca58c905e513373711ac6d1
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9e31bdf1204fd1d0f71a72f790c89994b7b42e355866f78e398dcd1fd72ff59
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7637349be59f99c83b895a14ee2abc08edf7040f91572c826bfc5e240c6cf3cc
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96722daeaa044ab72310631ca9e6cb3421c010477ca15702bf14dc71a0459b49
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36ecb8f00470b147597cb0cacfa91635a0005a8aa8e917d79b71eed2692f187e
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6408f4b0aa195908f564453cafd5739860df4366aa11fbce643000cf71fb361a
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2de12b682debc58a16157122005ee34693196684c691e90572cca7e76466246
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bb54c6b25889889c49cdbd9de91a22d239105af3881dd38ba49b92d0a971950
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:671cb48e85bbaaa47ed0fd861991ab59a348f2bf0b24bf261a2744e7f24b2809
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4afcb9cd9602dc2e00eeaf78dfe6d4b6f74f46f4affaac94377d375b2a776c3d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -15979,6 +15979,111 @@
|
|
15979 |
"eval_samples_per_second": 5.753,
|
15980 |
"eval_steps_per_second": 0.188,
|
15981 |
"step": 2100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15982 |
}
|
15983 |
],
|
15984 |
"logging_steps": 1,
|
@@ -15993,12 +16098,12 @@
|
|
15993 |
"should_evaluate": false,
|
15994 |
"should_log": false,
|
15995 |
"should_save": true,
|
15996 |
-
"should_training_stop":
|
15997 |
},
|
15998 |
"attributes": {}
|
15999 |
}
|
16000 |
},
|
16001 |
-
"total_flos": 5.
|
16002 |
"train_batch_size": 8,
|
16003 |
"trial_name": null,
|
16004 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 2115,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
15979 |
"eval_samples_per_second": 5.753,
|
15980 |
"eval_steps_per_second": 0.188,
|
15981 |
"step": 2100
|
15982 |
+
},
|
15983 |
+
{
|
15984 |
+
"epoch": 0.9933806146572104,
|
15985 |
+
"grad_norm": 5.17624044418335,
|
15986 |
+
"learning_rate": 2.6707234039302642e-09,
|
15987 |
+
"loss": 0.2462,
|
15988 |
+
"step": 2101
|
15989 |
+
},
|
15990 |
+
{
|
15991 |
+
"epoch": 0.9938534278959811,
|
15992 |
+
"grad_norm": 6.951145648956299,
|
15993 |
+
"learning_rate": 2.302831750020662e-09,
|
15994 |
+
"loss": 0.2213,
|
15995 |
+
"step": 2102
|
15996 |
+
},
|
15997 |
+
{
|
15998 |
+
"epoch": 0.9943262411347518,
|
15999 |
+
"grad_norm": 4.699036598205566,
|
16000 |
+
"learning_rate": 1.962187306674412e-09,
|
16001 |
+
"loss": 0.2068,
|
16002 |
+
"step": 2103
|
16003 |
+
},
|
16004 |
+
{
|
16005 |
+
"epoch": 0.9947990543735225,
|
16006 |
+
"grad_norm": 5.011316776275635,
|
16007 |
+
"learning_rate": 1.6487910022666698e-09,
|
16008 |
+
"loss": 0.154,
|
16009 |
+
"step": 2104
|
16010 |
+
},
|
16011 |
+
{
|
16012 |
+
"epoch": 0.9952718676122931,
|
16013 |
+
"grad_norm": 5.612926483154297,
|
16014 |
+
"learning_rate": 1.3626436909131014e-09,
|
16015 |
+
"loss": 0.2245,
|
16016 |
+
"step": 2105
|
16017 |
+
},
|
16018 |
+
{
|
16019 |
+
"epoch": 0.9957446808510638,
|
16020 |
+
"grad_norm": 6.049012184143066,
|
16021 |
+
"learning_rate": 1.1037461524643355e-09,
|
16022 |
+
"loss": 0.2118,
|
16023 |
+
"step": 2106
|
16024 |
+
},
|
16025 |
+
{
|
16026 |
+
"epoch": 0.9962174940898345,
|
16027 |
+
"grad_norm": 6.17867374420166,
|
16028 |
+
"learning_rate": 8.720990925059625e-10,
|
16029 |
+
"loss": 0.281,
|
16030 |
+
"step": 2107
|
16031 |
+
},
|
16032 |
+
{
|
16033 |
+
"epoch": 0.9966903073286052,
|
16034 |
+
"grad_norm": 5.029500484466553,
|
16035 |
+
"learning_rate": 6.677031423574232e-10,
|
16036 |
+
"loss": 0.2281,
|
16037 |
+
"step": 2108
|
16038 |
+
},
|
16039 |
+
{
|
16040 |
+
"epoch": 0.9971631205673759,
|
16041 |
+
"grad_norm": 6.733471393585205,
|
16042 |
+
"learning_rate": 4.905588590686794e-10,
|
16043 |
+
"loss": 0.2776,
|
16044 |
+
"step": 2109
|
16045 |
+
},
|
16046 |
+
{
|
16047 |
+
"epoch": 0.9976359338061466,
|
16048 |
+
"grad_norm": 9.003266334533691,
|
16049 |
+
"learning_rate": 3.4066672541910317e-10,
|
16050 |
+
"loss": 0.3067,
|
16051 |
+
"step": 2110
|
16052 |
+
},
|
16053 |
+
{
|
16054 |
+
"epoch": 0.9981087470449173,
|
16055 |
+
"grad_norm": 6.541497230529785,
|
16056 |
+
"learning_rate": 2.180271499185871e-10,
|
16057 |
+
"loss": 0.2418,
|
16058 |
+
"step": 2111
|
16059 |
+
},
|
16060 |
+
{
|
16061 |
+
"epoch": 0.9985815602836879,
|
16062 |
+
"grad_norm": 3.8773562908172607,
|
16063 |
+
"learning_rate": 1.2264046680088294e-10,
|
16064 |
+
"loss": 0.1889,
|
16065 |
+
"step": 2112
|
16066 |
+
},
|
16067 |
+
{
|
16068 |
+
"epoch": 0.9990543735224586,
|
16069 |
+
"grad_norm": 5.770922660827637,
|
16070 |
+
"learning_rate": 5.4506936030263026e-11,
|
16071 |
+
"loss": 0.2226,
|
16072 |
+
"step": 2113
|
16073 |
+
},
|
16074 |
+
{
|
16075 |
+
"epoch": 0.9995271867612293,
|
16076 |
+
"grad_norm": 4.714475631713867,
|
16077 |
+
"learning_rate": 1.3626743291528244e-11,
|
16078 |
+
"loss": 0.1988,
|
16079 |
+
"step": 2114
|
16080 |
+
},
|
16081 |
+
{
|
16082 |
+
"epoch": 1.0,
|
16083 |
+
"grad_norm": 9.07297134399414,
|
16084 |
+
"learning_rate": 0.0,
|
16085 |
+
"loss": 0.2536,
|
16086 |
+
"step": 2115
|
16087 |
}
|
16088 |
],
|
16089 |
"logging_steps": 1,
|
|
|
16098 |
"should_evaluate": false,
|
16099 |
"should_log": false,
|
16100 |
"should_save": true,
|
16101 |
+
"should_training_stop": true
|
16102 |
},
|
16103 |
"attributes": {}
|
16104 |
}
|
16105 |
},
|
16106 |
+
"total_flos": 5.4142614901253734e+17,
|
16107 |
"train_batch_size": 8,
|
16108 |
"trial_name": null,
|
16109 |
"trial_params": null
|