iamnguyen commited on
Commit
f8ff2fb
·
verified ·
1 Parent(s): e1c1e18

Training in progress, step 208, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e887d29daf2b46ba5add2a1260acddfa547b5cbfb37c95c229a2d7500fa39791
3
  size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67cde00778fd9b31e92769f2fa5fb22957b3c18857b8fa15489d34bf37fd4392
3
  size 242041896
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9e41312b0243a5a092e5edadd16a84a23dd9f7cd8c3b8119dee3c0ce5be1035
3
  size 484163514
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a268ea2d87ebd5586132b48f679290592ca529d7cd515b00d80780d58f6b8ca
3
  size 484163514
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1c0fd03a75513ee813bfa61334da6ab5c34878ec812085dffb828b64a684e20
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369ba992403565a78cd72e1427835ccc8b05eba0f71afd8552e75e7e6ea369c4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ced25cef61d4f9b598032b521e1925cfed15c77c31c1f230ecab47b226ba899
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba478b7a054120870328b778675384e38480f575773ac8971e03a9ab54918ff3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.012412121212121213,
5
  "eval_steps": 16,
6
- "global_step": 192,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1459,6 +1459,127 @@
1459
  "eval_samples_per_second": 11.909,
1460
  "eval_steps_per_second": 2.977,
1461
  "step": 192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1462
  }
1463
  ],
1464
  "logging_steps": 1,
@@ -1478,7 +1599,7 @@
1478
  "attributes": {}
1479
  }
1480
  },
1481
- "total_flos": 1663080056487936.0,
1482
  "train_batch_size": 4,
1483
  "trial_name": null,
1484
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.013446464646464646,
5
  "eval_steps": 16,
6
+ "global_step": 208,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1459
  "eval_samples_per_second": 11.909,
1460
  "eval_steps_per_second": 2.977,
1461
  "step": 192
1462
+ },
1463
+ {
1464
+ "epoch": 0.012476767676767676,
1465
+ "grad_norm": 0.10934247821569443,
1466
+ "learning_rate": 0.00019999784499847678,
1467
+ "loss": 0.119,
1468
+ "step": 193
1469
+ },
1470
+ {
1471
+ "epoch": 0.012541414141414142,
1472
+ "grad_norm": 0.04266177862882614,
1473
+ "learning_rate": 0.0001999977082069104,
1474
+ "loss": 0.1094,
1475
+ "step": 194
1476
+ },
1477
+ {
1478
+ "epoch": 0.012606060606060607,
1479
+ "grad_norm": 0.10852430015802383,
1480
+ "learning_rate": 0.00019999756720643803,
1481
+ "loss": 0.1118,
1482
+ "step": 195
1483
+ },
1484
+ {
1485
+ "epoch": 0.01267070707070707,
1486
+ "grad_norm": 0.06190445274114609,
1487
+ "learning_rate": 0.0001999974219970656,
1488
+ "loss": 0.129,
1489
+ "step": 196
1490
+ },
1491
+ {
1492
+ "epoch": 0.012735353535353535,
1493
+ "grad_norm": 0.04268389567732811,
1494
+ "learning_rate": 0.00019999727257879923,
1495
+ "loss": 0.1149,
1496
+ "step": 197
1497
+ },
1498
+ {
1499
+ "epoch": 0.0128,
1500
+ "grad_norm": 0.04210319742560387,
1501
+ "learning_rate": 0.0001999971189516452,
1502
+ "loss": 0.1231,
1503
+ "step": 198
1504
+ },
1505
+ {
1506
+ "epoch": 0.012864646464646464,
1507
+ "grad_norm": 0.07176094502210617,
1508
+ "learning_rate": 0.00019999696111561,
1509
+ "loss": 0.1123,
1510
+ "step": 199
1511
+ },
1512
+ {
1513
+ "epoch": 0.01292929292929293,
1514
+ "grad_norm": 0.04062803462147713,
1515
+ "learning_rate": 0.00019999679907070023,
1516
+ "loss": 0.1225,
1517
+ "step": 200
1518
+ },
1519
+ {
1520
+ "epoch": 0.012993939393939394,
1521
+ "grad_norm": 0.04266968369483948,
1522
+ "learning_rate": 0.00019999663281692275,
1523
+ "loss": 0.1259,
1524
+ "step": 201
1525
+ },
1526
+ {
1527
+ "epoch": 0.013058585858585858,
1528
+ "grad_norm": 0.045373089611530304,
1529
+ "learning_rate": 0.00019999646235428452,
1530
+ "loss": 0.1353,
1531
+ "step": 202
1532
+ },
1533
+ {
1534
+ "epoch": 0.013123232323232323,
1535
+ "grad_norm": 0.04623784124851227,
1536
+ "learning_rate": 0.00019999628768279276,
1537
+ "loss": 0.1224,
1538
+ "step": 203
1539
+ },
1540
+ {
1541
+ "epoch": 0.013187878787878788,
1542
+ "grad_norm": 0.03664301335811615,
1543
+ "learning_rate": 0.0001999961088024548,
1544
+ "loss": 0.1361,
1545
+ "step": 204
1546
+ },
1547
+ {
1548
+ "epoch": 0.013252525252525252,
1549
+ "grad_norm": 0.03849755972623825,
1550
+ "learning_rate": 0.00019999592571327815,
1551
+ "loss": 0.1307,
1552
+ "step": 205
1553
+ },
1554
+ {
1555
+ "epoch": 0.013317171717171717,
1556
+ "grad_norm": 0.03995022922754288,
1557
+ "learning_rate": 0.00019999573841527054,
1558
+ "loss": 0.1079,
1559
+ "step": 206
1560
+ },
1561
+ {
1562
+ "epoch": 0.013381818181818182,
1563
+ "grad_norm": 0.039675675332546234,
1564
+ "learning_rate": 0.00019999554690843988,
1565
+ "loss": 0.1212,
1566
+ "step": 207
1567
+ },
1568
+ {
1569
+ "epoch": 0.013446464646464646,
1570
+ "grad_norm": 0.05080877244472504,
1571
+ "learning_rate": 0.00019999535119279415,
1572
+ "loss": 0.0991,
1573
+ "step": 208
1574
+ },
1575
+ {
1576
+ "epoch": 0.013446464646464646,
1577
+ "eval_bleu": 3.66705872401506,
1578
+ "eval_loss": 0.09652489423751831,
1579
+ "eval_runtime": 1.3632,
1580
+ "eval_samples_per_second": 11.737,
1581
+ "eval_steps_per_second": 2.934,
1582
+ "step": 208
1583
  }
1584
  ],
1585
  "logging_steps": 1,
 
1599
  "attributes": {}
1600
  }
1601
  },
1602
+ "total_flos": 1801670061195264.0,
1603
  "train_batch_size": 4,
1604
  "trial_name": null,
1605
  "trial_params": null