mtzig commited on
Commit
ff01b2b
·
verified ·
1 Parent(s): 6d384dd

Training in progress, step 2115, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c626d88cecaf6f37244c24626ee31bda254de73e335860f886b2be28c4358d97
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3463bb2fcbde477a6451746c5e93c2c02d428df311895e645e51bd4d7bc5770
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b98f82f21939a42caf63b3e60fa8693d044a5bada470fb4c47ca564bc1aa2906
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45759926af3c5a130310d7c3435a2592683cc6f827fa760589170a59285976df
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1bccfb3da16edb9ca2352f991e7e2c84949c2cebb82bdfe6dff4edb7588812b
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac0546bb14dfa550d667edb17bad381998c91ad46b19cf4fa9c242b50a816ed5
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f1b5e474c9b591c523f4c4558a63e2fdd86f92990aa17d39609578b1c9d025a
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c0f6244ae9eafb7bfc7053255ee1f03da545471ca58c905e513373711ac6d1
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84eef1c8a398e669a09b130c39c3f146f2a1df5c8f58186431773f03716ad0dd
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e31bdf1204fd1d0f71a72f790c89994b7b42e355866f78e398dcd1fd72ff59
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85c2c1bfcfbe43cb98961bcf7bbee9910700d60cc94ea9e559cdcc0bfcaf1d3a
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7637349be59f99c83b895a14ee2abc08edf7040f91572c826bfc5e240c6cf3cc
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65404a56baaeb38eea09621cc68aa2f31f268f0657702a26eb129038b9b80d1b
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96722daeaa044ab72310631ca9e6cb3421c010477ca15702bf14dc71a0459b49
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3c2b908498addeec6c50ef933c786ada650e8ffdacabaf686c730cc90d5e9dd
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36ecb8f00470b147597cb0cacfa91635a0005a8aa8e917d79b71eed2692f187e
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75d7eee0983d654dc4f4d9d0aeab1c0cc99847a413b7ee9122cbe6f31278739d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6408f4b0aa195908f564453cafd5739860df4366aa11fbce643000cf71fb361a
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6b89b5ae016f3558d6cf4489eb242de8fea1141c77af78593bebef95e5e45eb
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2de12b682debc58a16157122005ee34693196684c691e90572cca7e76466246
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9375cbe9615de32a9bfeb48c97d58f16a884f450ceae1c1433fd9c53f512214c
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb54c6b25889889c49cdbd9de91a22d239105af3881dd38ba49b92d0a971950
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13c4476d4d3e749b45bb7cf5bd672971013f9e7d9039dbfad26020d82e32caff
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671cb48e85bbaaa47ed0fd861991ab59a348f2bf0b24bf261a2744e7f24b2809
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20531ddcffa25460cb7198bef6ec4382015b394eaa7700ad1ffe8c13cee7ce9f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4afcb9cd9602dc2e00eeaf78dfe6d4b6f74f46f4affaac94377d375b2a776c3d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9929078014184397,
5
  "eval_steps": 20,
6
- "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -15979,6 +15979,111 @@
15979
  "eval_samples_per_second": 5.753,
15980
  "eval_steps_per_second": 0.188,
15981
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15982
  }
15983
  ],
15984
  "logging_steps": 1,
@@ -15993,12 +16098,12 @@
15993
  "should_evaluate": false,
15994
  "should_log": false,
15995
  "should_save": true,
15996
- "should_training_stop": false
15997
  },
15998
  "attributes": {}
15999
  }
16000
  },
16001
- "total_flos": 5.377331196550185e+17,
16002
  "train_batch_size": 8,
16003
  "trial_name": null,
16004
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 20,
6
+ "global_step": 2115,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
15979
  "eval_samples_per_second": 5.753,
15980
  "eval_steps_per_second": 0.188,
15981
  "step": 2100
15982
+ },
15983
+ {
15984
+ "epoch": 0.9933806146572104,
15985
+ "grad_norm": 5.17624044418335,
15986
+ "learning_rate": 2.6707234039302642e-09,
15987
+ "loss": 0.2462,
15988
+ "step": 2101
15989
+ },
15990
+ {
15991
+ "epoch": 0.9938534278959811,
15992
+ "grad_norm": 6.951145648956299,
15993
+ "learning_rate": 2.302831750020662e-09,
15994
+ "loss": 0.2213,
15995
+ "step": 2102
15996
+ },
15997
+ {
15998
+ "epoch": 0.9943262411347518,
15999
+ "grad_norm": 4.699036598205566,
16000
+ "learning_rate": 1.962187306674412e-09,
16001
+ "loss": 0.2068,
16002
+ "step": 2103
16003
+ },
16004
+ {
16005
+ "epoch": 0.9947990543735225,
16006
+ "grad_norm": 5.011316776275635,
16007
+ "learning_rate": 1.6487910022666698e-09,
16008
+ "loss": 0.154,
16009
+ "step": 2104
16010
+ },
16011
+ {
16012
+ "epoch": 0.9952718676122931,
16013
+ "grad_norm": 5.612926483154297,
16014
+ "learning_rate": 1.3626436909131014e-09,
16015
+ "loss": 0.2245,
16016
+ "step": 2105
16017
+ },
16018
+ {
16019
+ "epoch": 0.9957446808510638,
16020
+ "grad_norm": 6.049012184143066,
16021
+ "learning_rate": 1.1037461524643355e-09,
16022
+ "loss": 0.2118,
16023
+ "step": 2106
16024
+ },
16025
+ {
16026
+ "epoch": 0.9962174940898345,
16027
+ "grad_norm": 6.17867374420166,
16028
+ "learning_rate": 8.720990925059625e-10,
16029
+ "loss": 0.281,
16030
+ "step": 2107
16031
+ },
16032
+ {
16033
+ "epoch": 0.9966903073286052,
16034
+ "grad_norm": 5.029500484466553,
16035
+ "learning_rate": 6.677031423574232e-10,
16036
+ "loss": 0.2281,
16037
+ "step": 2108
16038
+ },
16039
+ {
16040
+ "epoch": 0.9971631205673759,
16041
+ "grad_norm": 6.733471393585205,
16042
+ "learning_rate": 4.905588590686794e-10,
16043
+ "loss": 0.2776,
16044
+ "step": 2109
16045
+ },
16046
+ {
16047
+ "epoch": 0.9976359338061466,
16048
+ "grad_norm": 9.003266334533691,
16049
+ "learning_rate": 3.4066672541910317e-10,
16050
+ "loss": 0.3067,
16051
+ "step": 2110
16052
+ },
16053
+ {
16054
+ "epoch": 0.9981087470449173,
16055
+ "grad_norm": 6.541497230529785,
16056
+ "learning_rate": 2.180271499185871e-10,
16057
+ "loss": 0.2418,
16058
+ "step": 2111
16059
+ },
16060
+ {
16061
+ "epoch": 0.9985815602836879,
16062
+ "grad_norm": 3.8773562908172607,
16063
+ "learning_rate": 1.2264046680088294e-10,
16064
+ "loss": 0.1889,
16065
+ "step": 2112
16066
+ },
16067
+ {
16068
+ "epoch": 0.9990543735224586,
16069
+ "grad_norm": 5.770922660827637,
16070
+ "learning_rate": 5.4506936030263026e-11,
16071
+ "loss": 0.2226,
16072
+ "step": 2113
16073
+ },
16074
+ {
16075
+ "epoch": 0.9995271867612293,
16076
+ "grad_norm": 4.714475631713867,
16077
+ "learning_rate": 1.3626743291528244e-11,
16078
+ "loss": 0.1988,
16079
+ "step": 2114
16080
+ },
16081
+ {
16082
+ "epoch": 1.0,
16083
+ "grad_norm": 9.07297134399414,
16084
+ "learning_rate": 0.0,
16085
+ "loss": 0.2536,
16086
+ "step": 2115
16087
  }
16088
  ],
16089
  "logging_steps": 1,
 
16098
  "should_evaluate": false,
16099
  "should_log": false,
16100
  "should_save": true,
16101
+ "should_training_stop": true
16102
  },
16103
  "attributes": {}
16104
  }
16105
  },
16106
+ "total_flos": 5.4142614901253734e+17,
16107
  "train_batch_size": 8,
16108
  "trial_name": null,
16109
  "trial_params": null