iamnguyen commited on
Commit
5371239
·
verified ·
1 Parent(s): 5fa9a86

Training in progress, step 224, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33d1f902086bf162f42a891580c1d8f009b186a1b247fa2e5d0f3c8b552ca438
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8184a10ca48807e22cc06e50ec6c504f76f4078fa26a373c09b1e614591a70dd
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffbe6c82a241eec13776bec8b0245f431cae9a909fe4fa531b5a99f34a39e259
3
  size 240728084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27c59c970af91f75ad174c54b9036732a06ac597e37b07db4c76c33a0dbe1d9b
3
  size 240728084
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fb5768ef05fd83332549c08f206d2683f104437b7d63ada2cc0d97372b46d74
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ef7d41b67bbb64d8f59b1b890b60e98c43be1a525871d9f2311fa3b7b48a618
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.013447469810874753,
5
  "eval_steps": 500,
6
- "global_step": 208,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1463,6 +1463,118 @@
1463
  "learning_rate": 9.999704387106147e-06,
1464
  "loss": 1.3735,
1465
  "step": 208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1466
  }
1467
  ],
1468
  "logging_steps": 1,
@@ -1482,7 +1594,7 @@
1482
  "attributes": {}
1483
  }
1484
  },
1485
- "total_flos": 1.334557816141824e+17,
1486
  "train_batch_size": 2,
1487
  "trial_name": null,
1488
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.014481890565557426,
5
  "eval_steps": 500,
6
+ "global_step": 224,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1463
  "learning_rate": 9.999704387106147e-06,
1464
  "loss": 1.3735,
1465
  "step": 208
1466
+ },
1467
+ {
1468
+ "epoch": 0.013512121108042419,
1469
+ "grad_norm": 4.388930320739746,
1470
+ "learning_rate": 9.999693126780022e-06,
1471
+ "loss": 1.4288,
1472
+ "step": 209
1473
+ },
1474
+ {
1475
+ "epoch": 0.013576772405210087,
1476
+ "grad_norm": 3.7054548263549805,
1477
+ "learning_rate": 9.999681655989203e-06,
1478
+ "loss": 1.3322,
1479
+ "step": 210
1480
+ },
1481
+ {
1482
+ "epoch": 0.013641423702377753,
1483
+ "grad_norm": 4.013354778289795,
1484
+ "learning_rate": 9.999669974734172e-06,
1485
+ "loss": 1.4192,
1486
+ "step": 211
1487
+ },
1488
+ {
1489
+ "epoch": 0.01370607499954542,
1490
+ "grad_norm": 4.022690773010254,
1491
+ "learning_rate": 9.999658083015423e-06,
1492
+ "loss": 1.3474,
1493
+ "step": 212
1494
+ },
1495
+ {
1496
+ "epoch": 0.013770726296713087,
1497
+ "grad_norm": 3.8308322429656982,
1498
+ "learning_rate": 9.999645980833454e-06,
1499
+ "loss": 1.3902,
1500
+ "step": 213
1501
+ },
1502
+ {
1503
+ "epoch": 0.013835377593880755,
1504
+ "grad_norm": 4.453736305236816,
1505
+ "learning_rate": 9.999633668188778e-06,
1506
+ "loss": 1.4876,
1507
+ "step": 214
1508
+ },
1509
+ {
1510
+ "epoch": 0.013900028891048423,
1511
+ "grad_norm": 4.379161834716797,
1512
+ "learning_rate": 9.99962114508191e-06,
1513
+ "loss": 1.369,
1514
+ "step": 215
1515
+ },
1516
+ {
1517
+ "epoch": 0.013964680188216089,
1518
+ "grad_norm": 4.23476505279541,
1519
+ "learning_rate": 9.999608411513378e-06,
1520
+ "loss": 1.371,
1521
+ "step": 216
1522
+ },
1523
+ {
1524
+ "epoch": 0.014029331485383757,
1525
+ "grad_norm": 3.8091630935668945,
1526
+ "learning_rate": 9.999595467483719e-06,
1527
+ "loss": 1.3317,
1528
+ "step": 217
1529
+ },
1530
+ {
1531
+ "epoch": 0.014093982782551423,
1532
+ "grad_norm": 4.4740118980407715,
1533
+ "learning_rate": 9.999582312993476e-06,
1534
+ "loss": 1.3864,
1535
+ "step": 218
1536
+ },
1537
+ {
1538
+ "epoch": 0.01415863407971909,
1539
+ "grad_norm": 3.8283002376556396,
1540
+ "learning_rate": 9.999568948043206e-06,
1541
+ "loss": 1.3924,
1542
+ "step": 219
1543
+ },
1544
+ {
1545
+ "epoch": 0.014223285376886757,
1546
+ "grad_norm": 3.9413399696350098,
1547
+ "learning_rate": 9.99955537263347e-06,
1548
+ "loss": 1.365,
1549
+ "step": 220
1550
+ },
1551
+ {
1552
+ "epoch": 0.014287936674054424,
1553
+ "grad_norm": 3.7700750827789307,
1554
+ "learning_rate": 9.999541586764836e-06,
1555
+ "loss": 1.3265,
1556
+ "step": 221
1557
+ },
1558
+ {
1559
+ "epoch": 0.01435258797122209,
1560
+ "grad_norm": 4.468739986419678,
1561
+ "learning_rate": 9.999527590437889e-06,
1562
+ "loss": 1.4056,
1563
+ "step": 222
1564
+ },
1565
+ {
1566
+ "epoch": 0.014417239268389758,
1567
+ "grad_norm": 3.847881555557251,
1568
+ "learning_rate": 9.999513383653216e-06,
1569
+ "loss": 1.3369,
1570
+ "step": 223
1571
+ },
1572
+ {
1573
+ "epoch": 0.014481890565557426,
1574
+ "grad_norm": 4.515076637268066,
1575
+ "learning_rate": 9.999498966411415e-06,
1576
+ "loss": 1.3715,
1577
+ "step": 224
1578
  }
1579
  ],
1580
  "logging_steps": 1,
 
1594
  "attributes": {}
1595
  }
1596
  },
1597
+ "total_flos": 1.437989735806894e+17,
1598
  "train_batch_size": 2,
1599
  "trial_name": null,
1600
  "trial_params": null