linshoufan commited on
Commit
cd4be03
·
verified ·
1 Parent(s): fb41b6a

Training in progress, step 5500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f4b613c5f7354d671c93710070491406b34c5e5610b419a025fa57a760b0e36
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d5bd30929a6dd034a66c5d64682b9b3cdc6f1915335f3bc1d67c8a2e7afa275
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55beb8afdf524f8623448ab9f2a94a2ee20efa6b3a4716501fffcf426568abda
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0673f09c0825fa04b428f3263c1ec340a98a30b0050d9b657d3760f11d8c4051
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bb0e31972f31d6642dca556b6b92918a2f9af2dc789b1fb9a0b60e78133c947
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eff99030c3d6f5750daa7c2be6d4f88cb3d11423c32100f3c68edcfd52a81b3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5133b98c27975e2a75b74abd29527bed732dd00cbe36bc9a3146c616bc88352
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8beec2a3ed2d46be0542aeb907aa0e1e4613601adbf391ce67dcb87b78a7321a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 30.38405120682758,
3
- "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-5000",
4
- "epoch": 1.6072002571520412,
5
  "eval_steps": 500,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1497,6 +1497,155 @@
1497
  "eval_samples_per_second": 2.388,
1498
  "eval_steps_per_second": 0.299,
1499
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1500
  }
1501
  ],
1502
  "logging_steps": 25,
@@ -1504,7 +1653,7 @@
1504
  "num_input_tokens_seen": 0,
1505
  "num_train_epochs": 2,
1506
  "save_steps": 500,
1507
- "total_flos": 2.30868320256e+19,
1508
  "train_batch_size": 16,
1509
  "trial_name": null,
1510
  "trial_params": null
 
1
  {
2
+ "best_metric": 30.15735431390852,
3
+ "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-5500",
4
+ "epoch": 1.7679202828672453,
5
  "eval_steps": 500,
6
+ "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1497
  "eval_samples_per_second": 2.388,
1498
  "eval_steps_per_second": 0.299,
1499
  "step": 5000
1500
+ },
1501
+ {
1502
+ "epoch": 1.62,
1503
+ "grad_norm": 5.629392147064209,
1504
+ "learning_rate": 1.9552433845148645e-06,
1505
+ "loss": 0.1515,
1506
+ "step": 5025
1507
+ },
1508
+ {
1509
+ "epoch": 1.62,
1510
+ "grad_norm": 5.018975734710693,
1511
+ "learning_rate": 1.914407056517478e-06,
1512
+ "loss": 0.157,
1513
+ "step": 5050
1514
+ },
1515
+ {
1516
+ "epoch": 1.63,
1517
+ "grad_norm": 6.182131767272949,
1518
+ "learning_rate": 1.8735707285200916e-06,
1519
+ "loss": 0.13,
1520
+ "step": 5075
1521
+ },
1522
+ {
1523
+ "epoch": 1.64,
1524
+ "grad_norm": 9.000260353088379,
1525
+ "learning_rate": 1.8327344005227052e-06,
1526
+ "loss": 0.1726,
1527
+ "step": 5100
1528
+ },
1529
+ {
1530
+ "epoch": 1.65,
1531
+ "grad_norm": 6.853832244873047,
1532
+ "learning_rate": 1.7918980725253188e-06,
1533
+ "loss": 0.1451,
1534
+ "step": 5125
1535
+ },
1536
+ {
1537
+ "epoch": 1.66,
1538
+ "grad_norm": 5.68117618560791,
1539
+ "learning_rate": 1.7510617445279321e-06,
1540
+ "loss": 0.1518,
1541
+ "step": 5150
1542
+ },
1543
+ {
1544
+ "epoch": 1.66,
1545
+ "grad_norm": 4.632532119750977,
1546
+ "learning_rate": 1.7102254165305457e-06,
1547
+ "loss": 0.144,
1548
+ "step": 5175
1549
+ },
1550
+ {
1551
+ "epoch": 1.67,
1552
+ "grad_norm": 8.772269248962402,
1553
+ "learning_rate": 1.6693890885331592e-06,
1554
+ "loss": 0.1525,
1555
+ "step": 5200
1556
+ },
1557
+ {
1558
+ "epoch": 1.68,
1559
+ "grad_norm": 8.809287071228027,
1560
+ "learning_rate": 1.6285527605357728e-06,
1561
+ "loss": 0.132,
1562
+ "step": 5225
1563
+ },
1564
+ {
1565
+ "epoch": 1.69,
1566
+ "grad_norm": 7.337480545043945,
1567
+ "learning_rate": 1.5877164325383862e-06,
1568
+ "loss": 0.1549,
1569
+ "step": 5250
1570
+ },
1571
+ {
1572
+ "epoch": 1.7,
1573
+ "grad_norm": 5.269392013549805,
1574
+ "learning_rate": 1.5468801045409997e-06,
1575
+ "loss": 0.1524,
1576
+ "step": 5275
1577
+ },
1578
+ {
1579
+ "epoch": 1.7,
1580
+ "grad_norm": 7.877448558807373,
1581
+ "learning_rate": 1.5060437765436133e-06,
1582
+ "loss": 0.1421,
1583
+ "step": 5300
1584
+ },
1585
+ {
1586
+ "epoch": 1.71,
1587
+ "grad_norm": 6.454422950744629,
1588
+ "learning_rate": 1.4652074485462266e-06,
1589
+ "loss": 0.1377,
1590
+ "step": 5325
1591
+ },
1592
+ {
1593
+ "epoch": 1.72,
1594
+ "grad_norm": 7.873298645019531,
1595
+ "learning_rate": 1.4243711205488402e-06,
1596
+ "loss": 0.1446,
1597
+ "step": 5350
1598
+ },
1599
+ {
1600
+ "epoch": 1.73,
1601
+ "grad_norm": 6.6517486572265625,
1602
+ "learning_rate": 1.383534792551454e-06,
1603
+ "loss": 0.1482,
1604
+ "step": 5375
1605
+ },
1606
+ {
1607
+ "epoch": 1.74,
1608
+ "grad_norm": 9.937956809997559,
1609
+ "learning_rate": 1.3426984645540676e-06,
1610
+ "loss": 0.1306,
1611
+ "step": 5400
1612
+ },
1613
+ {
1614
+ "epoch": 1.74,
1615
+ "grad_norm": 4.228558540344238,
1616
+ "learning_rate": 1.301862136556681e-06,
1617
+ "loss": 0.1229,
1618
+ "step": 5425
1619
+ },
1620
+ {
1621
+ "epoch": 1.75,
1622
+ "grad_norm": 4.710421085357666,
1623
+ "learning_rate": 1.2610258085592945e-06,
1624
+ "loss": 0.1374,
1625
+ "step": 5450
1626
+ },
1627
+ {
1628
+ "epoch": 1.76,
1629
+ "grad_norm": 4.934779644012451,
1630
+ "learning_rate": 1.220189480561908e-06,
1631
+ "loss": 0.1321,
1632
+ "step": 5475
1633
+ },
1634
+ {
1635
+ "epoch": 1.77,
1636
+ "grad_norm": 9.244394302368164,
1637
+ "learning_rate": 1.1793531525645214e-06,
1638
+ "loss": 0.1343,
1639
+ "step": 5500
1640
+ },
1641
+ {
1642
+ "epoch": 1.77,
1643
+ "eval_cer": 30.15735431390852,
1644
+ "eval_loss": 0.3783666491508484,
1645
+ "eval_runtime": 1871.1459,
1646
+ "eval_samples_per_second": 2.371,
1647
+ "eval_steps_per_second": 0.297,
1648
+ "step": 5500
1649
  }
1650
  ],
1651
  "logging_steps": 25,
 
1653
  "num_input_tokens_seen": 0,
1654
  "num_train_epochs": 2,
1655
  "save_steps": 500,
1656
+ "total_flos": 2.539551522816e+19,
1657
  "train_batch_size": 16,
1658
  "trial_name": null,
1659
  "trial_params": null