mtzig commited on
Commit
36134be
·
verified ·
1 Parent(s): aca7c24

Training in progress, step 808, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:690953a5f1eaa9653131c0bd57c0088778a2758f6f1caff67bc9dc9bd0a61e4e
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d79a8a24bad92ed6eddb4f1a5ae34147ccedd0938830a31dd4b49d4107b25fe
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:961a2146c9b2e620d54c67901bd3fa375b0dd4b3895406d10bf2578c5745f6ec
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19d5894029ff5fb8c3f198f3d0dc4721910b9e651dcf6775ef035747ababd49d
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6e7156894345a02a9422f425abf2ddc88061246f26736af17b7bd221ea2b39c
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f89593cdfdb1d0d63e29c7655dd6926cfa8d7de6f64ba99861afa048925281
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afb3878b0710bfa44676e56faea2bf2e81a1c4bc0b34feb980fc568965e9b5bf
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de76ba031b2df70290abcc273c0ca7c76e5324bf347df6f17f34aee7dc63d1c
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d17c8c706eb940a9d1d5520139b6ffe5a44eb3dd61f2326391f6febfc3a6784f
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f35c31737247f2cdd166bfc3866830e294dca3791e7c58a4a4c7da0bde1664
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2101a5cc6a3e4ee7d9bcd0e452c1b7b0e0ae6c681cf45975f0c718f88b871286
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee274d658c2cd7d90c2cb7a6e847197c382e7b20d51d4d940ab01b8f68c15df2
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03595f74c1fc252124eeec1b051b356f6d7bf67292b9806a2f8674da24f88c10
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43d339baab952c823a939ba4aaa601da58042e6b41d474cb9dd88a0b1db4d424
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b14a06466128fc4e8a81dd91b128a90acb8f65c2f692c8d576ff2e0ea82b7812
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8564d765df3996b99c746bc714c38fa581f05f6f7bd8afb7323b7cee52b46db5
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:037251a482261d469d05ca212322fd7a8b0d86b1e9774acdb0dd0d30ed213fb4
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5267e6651ba6650aad8d4187f305b173ad364e0765707ff4829b4e5b285e5d5
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:971deb5c5f50d793d58f55fcc23e2e2e3d0e3e15819213e61585330da863b511
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb6e8a7a5edcac0f188611ba47c80f2c5826e8fdf1f9517cb0c11df9afe649c
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6969e1b0dfca6ead716e7d65a7048792f64786424a159e6ca34b0c6558b3ebd0
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b06ae580a53262ce36ae07c3fb3c1ed4571ad7e635f838d2bd9d98ff1de4da
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26ac0fcb20e2693146b5db8fdd7935ee7136850497cc891b3eff53fc7c189e4b
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c23b8d462da5c4f7f12b2321b9990502056ba1dfc5c41ddf990e0cf27e4f862b
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ff85c01ce8c147a5568f0e98607017d923cd6d903d76df137d87352cddf54da
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df74225d229428c183e5117a04f191bf9faa39263780150f0dc259da5480c864
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9900990099009901,
5
  "eval_steps": 20,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6099,6 +6099,62 @@
6099
  "eval_samples_per_second": 5.734,
6100
  "eval_steps_per_second": 0.187,
6101
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6102
  }
6103
  ],
6104
  "logging_steps": 1,
@@ -6113,12 +6169,12 @@
6113
  "should_evaluate": false,
6114
  "should_log": false,
6115
  "should_save": true,
6116
- "should_training_stop": false
6117
  },
6118
  "attributes": {}
6119
  }
6120
  },
6121
- "total_flos": 2.4540790520807424e+17,
6122
  "train_batch_size": 8,
6123
  "trial_name": null,
6124
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 20,
6
+ "global_step": 808,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6099
  "eval_samples_per_second": 5.734,
6100
  "eval_steps_per_second": 0.187,
6101
  "step": 800
6102
+ },
6103
+ {
6104
+ "epoch": 0.9913366336633663,
6105
+ "grad_norm": 5.27916955947876,
6106
+ "learning_rate": 4.574713411816811e-09,
6107
+ "loss": 0.1858,
6108
+ "step": 801
6109
+ },
6110
+ {
6111
+ "epoch": 0.9925742574257426,
6112
+ "grad_norm": 5.716527462005615,
6113
+ "learning_rate": 3.361081929664778e-09,
6114
+ "loss": 0.1604,
6115
+ "step": 802
6116
+ },
6117
+ {
6118
+ "epoch": 0.9938118811881188,
6119
+ "grad_norm": 5.259273529052734,
6120
+ "learning_rate": 2.3341246279806606e-09,
6121
+ "loss": 0.1598,
6122
+ "step": 803
6123
+ },
6124
+ {
6125
+ "epoch": 0.995049504950495,
6126
+ "grad_norm": 4.770205974578857,
6127
+ "learning_rate": 1.493860683851045e-09,
6128
+ "loss": 0.1981,
6129
+ "step": 804
6130
+ },
6131
+ {
6132
+ "epoch": 0.9962871287128713,
6133
+ "grad_norm": 3.5331854820251465,
6134
+ "learning_rate": 8.403057881067877e-10,
6135
+ "loss": 0.1671,
6136
+ "step": 805
6137
+ },
6138
+ {
6139
+ "epoch": 0.9975247524752475,
6140
+ "grad_norm": 4.290172576904297,
6141
+ "learning_rate": 3.7347214503435927e-10,
6142
+ "loss": 0.2307,
6143
+ "step": 806
6144
+ },
6145
+ {
6146
+ "epoch": 0.9987623762376238,
6147
+ "grad_norm": 4.1493964195251465,
6148
+ "learning_rate": 9.33684721426964e-11,
6149
+ "loss": 0.1106,
6150
+ "step": 807
6151
+ },
6152
+ {
6153
+ "epoch": 1.0,
6154
+ "grad_norm": 3.5634615421295166,
6155
+ "learning_rate": 0.0,
6156
+ "loss": 0.1396,
6157
+ "step": 808
6158
  }
6159
  ],
6160
  "logging_steps": 1,
 
6169
  "should_evaluate": false,
6170
  "should_log": false,
6171
  "should_save": true,
6172
+ "should_training_stop": true
6173
  },
6174
  "attributes": {}
6175
  }
6176
  },
6177
+ "total_flos": 2.4786605370559693e+17,
6178
  "train_batch_size": 8,
6179
  "trial_name": null,
6180
  "trial_params": null