SystemAdmin123 commited on
Commit
7ee9d54
·
verified ·
1 Parent(s): 02b776a

Training in progress, step 1320, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76da35fcd2eceb4682b02e4d6f4efd1f654fac0b25429d515156667dd817308a
3
  size 2433024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11643bfec0b8066f81435dfab9d6924d7eb3edeeac2270017d461eea734a9479
3
  size 2433024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f660c44323f7dc691c0421faf42c40cc765dca5c21d11bff643a623b003967a0
3
  size 2498406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdf2381d631ef4eb0f2ec3ba77196b6c928d7cc0993c1395a01f1054cc6eb637
3
  size 2498406
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b3ba644702b51ab6ceb3cfb78b0712e10c56d7a898133b9cf775673605d71a6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20d70fad263b22723fec206cf46c7be2eacd2f78f087c3d0bdf030ac5240ae13
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a575d13944792443c2e0faf6af7562460ac52c5628f17b918747a2fad55be01
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec3fcb4be00d780df479f9dac4126d0348e75ff289a5063a7ed0574b57fbc9eb
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3378545ae3c0bb849e88bd71db9a81f0556e610f7a1d6ea4af902425e910afa6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec5b78147ecf7eb0aa33d5383c0a50cc39cc808ebc0f0a52f2c385de2c5c0c79
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3581eff136704b2f763bfac1cfe6d4ca215660e05f32de8938f8c598bdd09e1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:def24f9a4a2da9e3ecf86804e9ee3edba51a08304bdf5081e4331fade20eec8a
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4710f39f928214c6084305a96ed4d69309abcb477c5fe9ea3b79644e92349f58
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d542d427bf2bb6ef4b7b489b3ff8728a8d7a7269befb6dac998921cfec4fa2fe
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17f2040b06510c4ed4dae6986e8f9b63891a60d4b9fa2e8045fc74b430abf05b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23a2b76363f40a9152be338f15b92fe9aea0c0d9d18bae8b15de98ba4365477a
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f0418024306b84efbd06ecb22274609094ccd51161118b224af78a4c9aa3c2e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1ca7fe9106d3f63890388b3012f13801b1f0ccf1ae72d0176ee3cc810f72b56
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d46bb0bbf258912802dbc17d20db208430fc3ab3a923169293e1cbe07fb7ae7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71951a6bb2a69638cdc2957f124934a2a5d71b1ae3bd32982c92819f8437c8ba
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25cf578d94ac1a7be20caf3c6bf3d856ece0554beb81a56caa6f17d994e34988
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2e3675f249d7d0e817b058a9658660c0c52e027175e9ede4bed994c9c640c8b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.514792899408284,
5
  "eval_steps": 20,
6
- "global_step": 1280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1423,6 +1423,50 @@
1423
  "eval_samples_per_second": 424.926,
1424
  "eval_steps_per_second": 26.593,
1425
  "step": 1280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1426
  }
1427
  ],
1428
  "logging_steps": 10,
@@ -1442,7 +1486,7 @@
1442
  "attributes": {}
1443
  }
1444
  },
1445
- "total_flos": 501547991040.0,
1446
  "train_batch_size": 2,
1447
  "trial_name": null,
1448
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5621301775147929,
5
  "eval_steps": 20,
6
+ "global_step": 1320,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1423
  "eval_samples_per_second": 424.926,
1424
  "eval_steps_per_second": 26.593,
1425
  "step": 1280
1426
+ },
1427
+ {
1428
+ "epoch": 1.5266272189349113,
1429
+ "grad_norm": 0.46484375,
1430
+ "learning_rate": 0.00010297580629631325,
1431
+ "loss": 10.4277,
1432
+ "step": 1290
1433
+ },
1434
+ {
1435
+ "epoch": 1.5384615384615383,
1436
+ "grad_norm": 0.47265625,
1437
+ "learning_rate": 0.00010165339447663587,
1438
+ "loss": 10.4369,
1439
+ "step": 1300
1440
+ },
1441
+ {
1442
+ "epoch": 1.5384615384615383,
1443
+ "eval_loss": 10.48730182647705,
1444
+ "eval_runtime": 3.5506,
1445
+ "eval_samples_per_second": 423.028,
1446
+ "eval_steps_per_second": 26.474,
1447
+ "step": 1300
1448
+ },
1449
+ {
1450
+ "epoch": 1.5502958579881656,
1451
+ "grad_norm": 0.484375,
1452
+ "learning_rate": 0.00010033069336079952,
1453
+ "loss": 10.4141,
1454
+ "step": 1310
1455
+ },
1456
+ {
1457
+ "epoch": 1.5621301775147929,
1458
+ "grad_norm": 0.58984375,
1459
+ "learning_rate": 9.900793438320037e-05,
1460
+ "loss": 10.4642,
1461
+ "step": 1320
1462
+ },
1463
+ {
1464
+ "epoch": 1.5621301775147929,
1465
+ "eval_loss": 10.487874031066895,
1466
+ "eval_runtime": 3.5842,
1467
+ "eval_samples_per_second": 419.067,
1468
+ "eval_steps_per_second": 26.227,
1469
+ "step": 1320
1470
  }
1471
  ],
1472
  "logging_steps": 10,
 
1486
  "attributes": {}
1487
  }
1488
  },
1489
+ "total_flos": 517262671872.0,
1490
  "train_batch_size": 2,
1491
  "trial_name": null,
1492
  "trial_params": null