amazingvince commited on
Commit
98bad92
1 Parent(s): 22d81fe

Upload folder using huggingface_hub

Browse files
latest CHANGED
@@ -1 +1 @@
1
- global_step1200
 
1
+ global_step1400
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b692de05056f10e069f7e9c0915bb02734793c59dca77041aa1eea315cd3a34
3
  size 4944210912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a7378f1bd417634c652504fb1e073ff746c4473bbae4330d0538bf5807bb8f
3
  size 4944210912
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01a591f9840a3d2a9ee3c6234e6fd9ab33bcc1c98a7b45790346153f338a5b2f
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a0968dc461aae4fc1998cdc77c5cbb847ea94aa677782e98019100676a5651b
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d2eebebc0731faba3082c7171851b35cf53492353b5c7014bd348a3bae4aaa3
3
  size 4541564920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d84f4054dd4557afb62fbc6491863a0b10f8c4a27f0c3c24f787a70a891f75
3
  size 4541564920
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04874209844888426,
5
  "eval_steps": 400,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1477,6 +1477,246 @@
1477
  "eval_samples_per_second": 17.136,
1478
  "eval_steps_per_second": 2.861,
1479
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1480
  }
1481
  ],
1482
  "logging_steps": 5,
@@ -1484,7 +1724,7 @@
1484
  "num_input_tokens_seen": 0,
1485
  "num_train_epochs": 1,
1486
  "save_steps": 200,
1487
- "total_flos": 166374297772032.0,
1488
  "trial_name": null,
1489
  "trial_params": null
1490
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.056865781523698304,
5
  "eval_steps": 400,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1477
  "eval_samples_per_second": 17.136,
1478
  "eval_steps_per_second": 2.861,
1479
  "step": 1200
1480
+ },
1481
+ {
1482
+ "epoch": 0.05,
1483
+ "learning_rate": 1.992385060274044e-06,
1484
+ "loss": 0.6971,
1485
+ "step": 1205
1486
+ },
1487
+ {
1488
+ "epoch": 0.05,
1489
+ "learning_rate": 1.9923054673134564e-06,
1490
+ "loss": 0.7887,
1491
+ "step": 1210
1492
+ },
1493
+ {
1494
+ "epoch": 0.05,
1495
+ "learning_rate": 1.992225462157807e-06,
1496
+ "loss": 0.7134,
1497
+ "step": 1215
1498
+ },
1499
+ {
1500
+ "epoch": 0.05,
1501
+ "learning_rate": 1.99214504484033e-06,
1502
+ "loss": 0.7384,
1503
+ "step": 1220
1504
+ },
1505
+ {
1506
+ "epoch": 0.05,
1507
+ "learning_rate": 1.9920642153944288e-06,
1508
+ "loss": 0.7228,
1509
+ "step": 1225
1510
+ },
1511
+ {
1512
+ "epoch": 0.05,
1513
+ "learning_rate": 1.9919829738536806e-06,
1514
+ "loss": 0.6835,
1515
+ "step": 1230
1516
+ },
1517
+ {
1518
+ "epoch": 0.05,
1519
+ "learning_rate": 1.991901320251831e-06,
1520
+ "loss": 0.6922,
1521
+ "step": 1235
1522
+ },
1523
+ {
1524
+ "epoch": 0.05,
1525
+ "learning_rate": 1.9918192546227995e-06,
1526
+ "loss": 0.7258,
1527
+ "step": 1240
1528
+ },
1529
+ {
1530
+ "epoch": 0.05,
1531
+ "learning_rate": 1.991736777000675e-06,
1532
+ "loss": 0.7399,
1533
+ "step": 1245
1534
+ },
1535
+ {
1536
+ "epoch": 0.05,
1537
+ "learning_rate": 1.9916538874197176e-06,
1538
+ "loss": 0.7625,
1539
+ "step": 1250
1540
+ },
1541
+ {
1542
+ "epoch": 0.05,
1543
+ "learning_rate": 1.9915705859143594e-06,
1544
+ "loss": 0.6707,
1545
+ "step": 1255
1546
+ },
1547
+ {
1548
+ "epoch": 0.05,
1549
+ "learning_rate": 1.9914868725192025e-06,
1550
+ "loss": 0.6932,
1551
+ "step": 1260
1552
+ },
1553
+ {
1554
+ "epoch": 0.05,
1555
+ "learning_rate": 1.991402747269022e-06,
1556
+ "loss": 0.7425,
1557
+ "step": 1265
1558
+ },
1559
+ {
1560
+ "epoch": 0.05,
1561
+ "learning_rate": 1.991318210198761e-06,
1562
+ "loss": 0.7002,
1563
+ "step": 1270
1564
+ },
1565
+ {
1566
+ "epoch": 0.05,
1567
+ "learning_rate": 1.991233261343537e-06,
1568
+ "loss": 0.6766,
1569
+ "step": 1275
1570
+ },
1571
+ {
1572
+ "epoch": 0.05,
1573
+ "learning_rate": 1.9911479007386364e-06,
1574
+ "loss": 0.7258,
1575
+ "step": 1280
1576
+ },
1577
+ {
1578
+ "epoch": 0.05,
1579
+ "learning_rate": 1.991062128419517e-06,
1580
+ "loss": 0.7467,
1581
+ "step": 1285
1582
+ },
1583
+ {
1584
+ "epoch": 0.05,
1585
+ "learning_rate": 1.9909759444218085e-06,
1586
+ "loss": 0.722,
1587
+ "step": 1290
1588
+ },
1589
+ {
1590
+ "epoch": 0.05,
1591
+ "learning_rate": 1.9908893487813106e-06,
1592
+ "loss": 0.7107,
1593
+ "step": 1295
1594
+ },
1595
+ {
1596
+ "epoch": 0.05,
1597
+ "learning_rate": 1.990802341533994e-06,
1598
+ "loss": 0.7337,
1599
+ "step": 1300
1600
+ },
1601
+ {
1602
+ "epoch": 0.05,
1603
+ "learning_rate": 1.9907149227160016e-06,
1604
+ "loss": 0.7075,
1605
+ "step": 1305
1606
+ },
1607
+ {
1608
+ "epoch": 0.05,
1609
+ "learning_rate": 1.9906270923636457e-06,
1610
+ "loss": 0.7157,
1611
+ "step": 1310
1612
+ },
1613
+ {
1614
+ "epoch": 0.05,
1615
+ "learning_rate": 1.9905388505134107e-06,
1616
+ "loss": 0.6916,
1617
+ "step": 1315
1618
+ },
1619
+ {
1620
+ "epoch": 0.05,
1621
+ "learning_rate": 1.990450197201951e-06,
1622
+ "loss": 0.6997,
1623
+ "step": 1320
1624
+ },
1625
+ {
1626
+ "epoch": 0.05,
1627
+ "learning_rate": 1.990361132466093e-06,
1628
+ "loss": 0.7067,
1629
+ "step": 1325
1630
+ },
1631
+ {
1632
+ "epoch": 0.05,
1633
+ "learning_rate": 1.9902716563428335e-06,
1634
+ "loss": 0.7209,
1635
+ "step": 1330
1636
+ },
1637
+ {
1638
+ "epoch": 0.05,
1639
+ "learning_rate": 1.9901817688693395e-06,
1640
+ "loss": 0.7004,
1641
+ "step": 1335
1642
+ },
1643
+ {
1644
+ "epoch": 0.05,
1645
+ "learning_rate": 1.99009147008295e-06,
1646
+ "loss": 0.713,
1647
+ "step": 1340
1648
+ },
1649
+ {
1650
+ "epoch": 0.05,
1651
+ "learning_rate": 1.9900007600211735e-06,
1652
+ "loss": 0.6596,
1653
+ "step": 1345
1654
+ },
1655
+ {
1656
+ "epoch": 0.05,
1657
+ "learning_rate": 1.9899096387216914e-06,
1658
+ "loss": 0.7426,
1659
+ "step": 1350
1660
+ },
1661
+ {
1662
+ "epoch": 0.06,
1663
+ "learning_rate": 1.9898181062223536e-06,
1664
+ "loss": 0.7103,
1665
+ "step": 1355
1666
+ },
1667
+ {
1668
+ "epoch": 0.06,
1669
+ "learning_rate": 1.9897261625611822e-06,
1670
+ "loss": 0.6906,
1671
+ "step": 1360
1672
+ },
1673
+ {
1674
+ "epoch": 0.06,
1675
+ "learning_rate": 1.9896338077763704e-06,
1676
+ "loss": 0.7082,
1677
+ "step": 1365
1678
+ },
1679
+ {
1680
+ "epoch": 0.06,
1681
+ "learning_rate": 1.989541041906281e-06,
1682
+ "loss": 0.7135,
1683
+ "step": 1370
1684
+ },
1685
+ {
1686
+ "epoch": 0.06,
1687
+ "learning_rate": 1.9894478649894484e-06,
1688
+ "loss": 0.7033,
1689
+ "step": 1375
1690
+ },
1691
+ {
1692
+ "epoch": 0.06,
1693
+ "learning_rate": 1.989354277064577e-06,
1694
+ "loss": 0.7452,
1695
+ "step": 1380
1696
+ },
1697
+ {
1698
+ "epoch": 0.06,
1699
+ "learning_rate": 1.9892602781705427e-06,
1700
+ "loss": 0.6947,
1701
+ "step": 1385
1702
+ },
1703
+ {
1704
+ "epoch": 0.06,
1705
+ "learning_rate": 1.9891658683463922e-06,
1706
+ "loss": 0.7412,
1707
+ "step": 1390
1708
+ },
1709
+ {
1710
+ "epoch": 0.06,
1711
+ "learning_rate": 1.989071047631342e-06,
1712
+ "loss": 0.6646,
1713
+ "step": 1395
1714
+ },
1715
+ {
1716
+ "epoch": 0.06,
1717
+ "learning_rate": 1.98897581606478e-06,
1718
+ "loss": 0.6847,
1719
+ "step": 1400
1720
  }
1721
  ],
1722
  "logging_steps": 5,
 
1724
  "num_input_tokens_seen": 0,
1725
  "num_train_epochs": 1,
1726
  "save_steps": 200,
1727
+ "total_flos": 194475417608192.0,
1728
  "trial_name": null,
1729
  "trial_params": null
1730
  }