Training in progress, step 270000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2db59304ac09b28bcb3c65afafd5ee1982c926ae9e5b3be0a3041e16925dc09d
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6481ae1099357d8bcf88c484382e1abee67e04204a9282cec0553dec29988327
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b23debd59dd70fd2819e8cfb49b59ed6e24536a986d25a514bb064603c02e2
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cb61348726887e329b19406ea4e3e39ac391edeec6dfd8508b3cb524aa33e28
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5206,11 +5206,211 @@
|
|
5206 |
"eval_samples_per_second": 1533.386,
|
5207 |
"eval_steps_per_second": 24.417,
|
5208 |
"step": 260000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5209 |
}
|
5210 |
],
|
5211 |
"max_steps": 500000,
|
5212 |
"num_train_epochs": 12,
|
5213 |
-
"total_flos": 8.
|
5214 |
"trial_name": null,
|
5215 |
"trial_params": null
|
5216 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.290920105314663,
|
5 |
+
"global_step": 270000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5206 |
"eval_samples_per_second": 1533.386,
|
5207 |
"eval_steps_per_second": 24.417,
|
5208 |
"step": 260000
|
5209 |
+
},
|
5210 |
+
{
|
5211 |
+
"epoch": 6.07,
|
5212 |
+
"learning_rate": 0.00015691796905504187,
|
5213 |
+
"loss": 0.2712,
|
5214 |
+
"step": 260500
|
5215 |
+
},
|
5216 |
+
{
|
5217 |
+
"epoch": 6.08,
|
5218 |
+
"learning_rate": 0.00015643849514435944,
|
5219 |
+
"loss": 0.2709,
|
5220 |
+
"step": 261000
|
5221 |
+
},
|
5222 |
+
{
|
5223 |
+
"epoch": 6.08,
|
5224 |
+
"eval_loss": 0.2548723816871643,
|
5225 |
+
"eval_runtime": 1.4314,
|
5226 |
+
"eval_samples_per_second": 1535.607,
|
5227 |
+
"eval_steps_per_second": 24.452,
|
5228 |
+
"step": 261000
|
5229 |
+
},
|
5230 |
+
{
|
5231 |
+
"epoch": 6.09,
|
5232 |
+
"learning_rate": 0.00015595900550252463,
|
5233 |
+
"loss": 0.2707,
|
5234 |
+
"step": 261500
|
5235 |
+
},
|
5236 |
+
{
|
5237 |
+
"epoch": 6.1,
|
5238 |
+
"learning_rate": 0.00015547950537315926,
|
5239 |
+
"loss": 0.2708,
|
5240 |
+
"step": 262000
|
5241 |
+
},
|
5242 |
+
{
|
5243 |
+
"epoch": 6.1,
|
5244 |
+
"eval_loss": 0.2543224096298218,
|
5245 |
+
"eval_runtime": 1.4239,
|
5246 |
+
"eval_samples_per_second": 1543.639,
|
5247 |
+
"eval_steps_per_second": 24.58,
|
5248 |
+
"step": 262000
|
5249 |
+
},
|
5250 |
+
{
|
5251 |
+
"epoch": 6.12,
|
5252 |
+
"learning_rate": 0.00015499999999999997,
|
5253 |
+
"loss": 0.2708,
|
5254 |
+
"step": 262500
|
5255 |
+
},
|
5256 |
+
{
|
5257 |
+
"epoch": 6.13,
|
5258 |
+
"learning_rate": 0.00015452049462684068,
|
5259 |
+
"loss": 0.2719,
|
5260 |
+
"step": 263000
|
5261 |
+
},
|
5262 |
+
{
|
5263 |
+
"epoch": 6.13,
|
5264 |
+
"eval_loss": 0.25397995114326477,
|
5265 |
+
"eval_runtime": 1.4586,
|
5266 |
+
"eval_samples_per_second": 1506.925,
|
5267 |
+
"eval_steps_per_second": 23.996,
|
5268 |
+
"step": 263000
|
5269 |
+
},
|
5270 |
+
{
|
5271 |
+
"epoch": 6.14,
|
5272 |
+
"learning_rate": 0.00015404099449747535,
|
5273 |
+
"loss": 0.2712,
|
5274 |
+
"step": 263500
|
5275 |
+
},
|
5276 |
+
{
|
5277 |
+
"epoch": 6.15,
|
5278 |
+
"learning_rate": 0.0001535615048556405,
|
5279 |
+
"loss": 0.2706,
|
5280 |
+
"step": 264000
|
5281 |
+
},
|
5282 |
+
{
|
5283 |
+
"epoch": 6.15,
|
5284 |
+
"eval_loss": 0.2522125244140625,
|
5285 |
+
"eval_runtime": 1.474,
|
5286 |
+
"eval_samples_per_second": 1491.135,
|
5287 |
+
"eval_steps_per_second": 23.744,
|
5288 |
+
"step": 264000
|
5289 |
+
},
|
5290 |
+
{
|
5291 |
+
"epoch": 6.16,
|
5292 |
+
"learning_rate": 0.0001530820309449581,
|
5293 |
+
"loss": 0.2705,
|
5294 |
+
"step": 264500
|
5295 |
+
},
|
5296 |
+
{
|
5297 |
+
"epoch": 6.17,
|
5298 |
+
"learning_rate": 0.00015260257800887798,
|
5299 |
+
"loss": 0.2705,
|
5300 |
+
"step": 265000
|
5301 |
+
},
|
5302 |
+
{
|
5303 |
+
"epoch": 6.17,
|
5304 |
+
"eval_loss": 0.25516122579574585,
|
5305 |
+
"eval_runtime": 1.3844,
|
5306 |
+
"eval_samples_per_second": 1587.692,
|
5307 |
+
"eval_steps_per_second": 25.282,
|
5308 |
+
"step": 265000
|
5309 |
+
},
|
5310 |
+
{
|
5311 |
+
"epoch": 6.19,
|
5312 |
+
"learning_rate": 0.0001521231512906207,
|
5313 |
+
"loss": 0.2709,
|
5314 |
+
"step": 265500
|
5315 |
+
},
|
5316 |
+
{
|
5317 |
+
"epoch": 6.2,
|
5318 |
+
"learning_rate": 0.00015164375603311998,
|
5319 |
+
"loss": 0.2708,
|
5320 |
+
"step": 266000
|
5321 |
+
},
|
5322 |
+
{
|
5323 |
+
"epoch": 6.2,
|
5324 |
+
"eval_loss": 0.25247690081596375,
|
5325 |
+
"eval_runtime": 1.4156,
|
5326 |
+
"eval_samples_per_second": 1552.724,
|
5327 |
+
"eval_steps_per_second": 24.725,
|
5328 |
+
"step": 266000
|
5329 |
+
},
|
5330 |
+
{
|
5331 |
+
"epoch": 6.21,
|
5332 |
+
"learning_rate": 0.00015116439747896553,
|
5333 |
+
"loss": 0.271,
|
5334 |
+
"step": 266500
|
5335 |
+
},
|
5336 |
+
{
|
5337 |
+
"epoch": 6.22,
|
5338 |
+
"learning_rate": 0.00015068508087034578,
|
5339 |
+
"loss": 0.2704,
|
5340 |
+
"step": 267000
|
5341 |
+
},
|
5342 |
+
{
|
5343 |
+
"epoch": 6.22,
|
5344 |
+
"eval_loss": 0.25168919563293457,
|
5345 |
+
"eval_runtime": 1.4279,
|
5346 |
+
"eval_samples_per_second": 1539.31,
|
5347 |
+
"eval_steps_per_second": 24.511,
|
5348 |
+
"step": 267000
|
5349 |
+
},
|
5350 |
+
{
|
5351 |
+
"epoch": 6.23,
|
5352 |
+
"learning_rate": 0.00015020581144899027,
|
5353 |
+
"loss": 0.2702,
|
5354 |
+
"step": 267500
|
5355 |
+
},
|
5356 |
+
{
|
5357 |
+
"epoch": 6.24,
|
5358 |
+
"learning_rate": 0.0001497265944561127,
|
5359 |
+
"loss": 0.2701,
|
5360 |
+
"step": 268000
|
5361 |
+
},
|
5362 |
+
{
|
5363 |
+
"epoch": 6.24,
|
5364 |
+
"eval_loss": 0.2535783052444458,
|
5365 |
+
"eval_runtime": 1.4078,
|
5366 |
+
"eval_samples_per_second": 1561.345,
|
5367 |
+
"eval_steps_per_second": 24.862,
|
5368 |
+
"step": 268000
|
5369 |
+
},
|
5370 |
+
{
|
5371 |
+
"epoch": 6.26,
|
5372 |
+
"learning_rate": 0.00014924743513235327,
|
5373 |
+
"loss": 0.2703,
|
5374 |
+
"step": 268500
|
5375 |
+
},
|
5376 |
+
{
|
5377 |
+
"epoch": 6.27,
|
5378 |
+
"learning_rate": 0.0001487683387177216,
|
5379 |
+
"loss": 0.2704,
|
5380 |
+
"step": 269000
|
5381 |
+
},
|
5382 |
+
{
|
5383 |
+
"epoch": 6.27,
|
5384 |
+
"eval_loss": 0.25389179587364197,
|
5385 |
+
"eval_runtime": 1.4483,
|
5386 |
+
"eval_samples_per_second": 1517.622,
|
5387 |
+
"eval_steps_per_second": 24.166,
|
5388 |
+
"step": 269000
|
5389 |
+
},
|
5390 |
+
{
|
5391 |
+
"epoch": 6.28,
|
5392 |
+
"learning_rate": 0.00014828931045153928,
|
5393 |
+
"loss": 0.2703,
|
5394 |
+
"step": 269500
|
5395 |
+
},
|
5396 |
+
{
|
5397 |
+
"epoch": 6.29,
|
5398 |
+
"learning_rate": 0.00014781035557238272,
|
5399 |
+
"loss": 0.2702,
|
5400 |
+
"step": 270000
|
5401 |
+
},
|
5402 |
+
{
|
5403 |
+
"epoch": 6.29,
|
5404 |
+
"eval_loss": 0.2548398971557617,
|
5405 |
+
"eval_runtime": 1.4524,
|
5406 |
+
"eval_samples_per_second": 1513.389,
|
5407 |
+
"eval_steps_per_second": 24.099,
|
5408 |
+
"step": 270000
|
5409 |
}
|
5410 |
],
|
5411 |
"max_steps": 500000,
|
5412 |
"num_train_epochs": 12,
|
5413 |
+
"total_flos": 8.626137764966059e+21,
|
5414 |
"trial_name": null,
|
5415 |
"trial_params": null
|
5416 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6481ae1099357d8bcf88c484382e1abee67e04204a9282cec0553dec29988327
|
3 |
size 102501541
|