linshoufan
commited on
Training in progress, step 5500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 966995080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d5bd30929a6dd034a66c5d64682b9b3cdc6f1915335f3bc1d67c8a2e7afa275
|
3 |
size 966995080
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1925064044
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0673f09c0825fa04b428f3263c1ec340a98a30b0050d9b657d3760f11d8c4051
|
3 |
size 1925064044
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9eff99030c3d6f5750daa7c2be6d4f88cb3d11423c32100f3c68edcfd52a81b3
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8beec2a3ed2d46be0542aeb907aa0e1e4613601adbf391ce67dcb87b78a7321a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 30.
|
3 |
-
"best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1497,6 +1497,155 @@
|
|
1497 |
"eval_samples_per_second": 2.388,
|
1498 |
"eval_steps_per_second": 0.299,
|
1499 |
"step": 5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1500 |
}
|
1501 |
],
|
1502 |
"logging_steps": 25,
|
@@ -1504,7 +1653,7 @@
|
|
1504 |
"num_input_tokens_seen": 0,
|
1505 |
"num_train_epochs": 2,
|
1506 |
"save_steps": 500,
|
1507 |
-
"total_flos": 2.
|
1508 |
"train_batch_size": 16,
|
1509 |
"trial_name": null,
|
1510 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 30.15735431390852,
|
3 |
+
"best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-5500",
|
4 |
+
"epoch": 1.7679202828672453,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1497 |
"eval_samples_per_second": 2.388,
|
1498 |
"eval_steps_per_second": 0.299,
|
1499 |
"step": 5000
|
1500 |
+
},
|
1501 |
+
{
|
1502 |
+
"epoch": 1.62,
|
1503 |
+
"grad_norm": 5.629392147064209,
|
1504 |
+
"learning_rate": 1.9552433845148645e-06,
|
1505 |
+
"loss": 0.1515,
|
1506 |
+
"step": 5025
|
1507 |
+
},
|
1508 |
+
{
|
1509 |
+
"epoch": 1.62,
|
1510 |
+
"grad_norm": 5.018975734710693,
|
1511 |
+
"learning_rate": 1.914407056517478e-06,
|
1512 |
+
"loss": 0.157,
|
1513 |
+
"step": 5050
|
1514 |
+
},
|
1515 |
+
{
|
1516 |
+
"epoch": 1.63,
|
1517 |
+
"grad_norm": 6.182131767272949,
|
1518 |
+
"learning_rate": 1.8735707285200916e-06,
|
1519 |
+
"loss": 0.13,
|
1520 |
+
"step": 5075
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 1.64,
|
1524 |
+
"grad_norm": 9.000260353088379,
|
1525 |
+
"learning_rate": 1.8327344005227052e-06,
|
1526 |
+
"loss": 0.1726,
|
1527 |
+
"step": 5100
|
1528 |
+
},
|
1529 |
+
{
|
1530 |
+
"epoch": 1.65,
|
1531 |
+
"grad_norm": 6.853832244873047,
|
1532 |
+
"learning_rate": 1.7918980725253188e-06,
|
1533 |
+
"loss": 0.1451,
|
1534 |
+
"step": 5125
|
1535 |
+
},
|
1536 |
+
{
|
1537 |
+
"epoch": 1.66,
|
1538 |
+
"grad_norm": 5.68117618560791,
|
1539 |
+
"learning_rate": 1.7510617445279321e-06,
|
1540 |
+
"loss": 0.1518,
|
1541 |
+
"step": 5150
|
1542 |
+
},
|
1543 |
+
{
|
1544 |
+
"epoch": 1.66,
|
1545 |
+
"grad_norm": 4.632532119750977,
|
1546 |
+
"learning_rate": 1.7102254165305457e-06,
|
1547 |
+
"loss": 0.144,
|
1548 |
+
"step": 5175
|
1549 |
+
},
|
1550 |
+
{
|
1551 |
+
"epoch": 1.67,
|
1552 |
+
"grad_norm": 8.772269248962402,
|
1553 |
+
"learning_rate": 1.6693890885331592e-06,
|
1554 |
+
"loss": 0.1525,
|
1555 |
+
"step": 5200
|
1556 |
+
},
|
1557 |
+
{
|
1558 |
+
"epoch": 1.68,
|
1559 |
+
"grad_norm": 8.809287071228027,
|
1560 |
+
"learning_rate": 1.6285527605357728e-06,
|
1561 |
+
"loss": 0.132,
|
1562 |
+
"step": 5225
|
1563 |
+
},
|
1564 |
+
{
|
1565 |
+
"epoch": 1.69,
|
1566 |
+
"grad_norm": 7.337480545043945,
|
1567 |
+
"learning_rate": 1.5877164325383862e-06,
|
1568 |
+
"loss": 0.1549,
|
1569 |
+
"step": 5250
|
1570 |
+
},
|
1571 |
+
{
|
1572 |
+
"epoch": 1.7,
|
1573 |
+
"grad_norm": 5.269392013549805,
|
1574 |
+
"learning_rate": 1.5468801045409997e-06,
|
1575 |
+
"loss": 0.1524,
|
1576 |
+
"step": 5275
|
1577 |
+
},
|
1578 |
+
{
|
1579 |
+
"epoch": 1.7,
|
1580 |
+
"grad_norm": 7.877448558807373,
|
1581 |
+
"learning_rate": 1.5060437765436133e-06,
|
1582 |
+
"loss": 0.1421,
|
1583 |
+
"step": 5300
|
1584 |
+
},
|
1585 |
+
{
|
1586 |
+
"epoch": 1.71,
|
1587 |
+
"grad_norm": 6.454422950744629,
|
1588 |
+
"learning_rate": 1.4652074485462266e-06,
|
1589 |
+
"loss": 0.1377,
|
1590 |
+
"step": 5325
|
1591 |
+
},
|
1592 |
+
{
|
1593 |
+
"epoch": 1.72,
|
1594 |
+
"grad_norm": 7.873298645019531,
|
1595 |
+
"learning_rate": 1.4243711205488402e-06,
|
1596 |
+
"loss": 0.1446,
|
1597 |
+
"step": 5350
|
1598 |
+
},
|
1599 |
+
{
|
1600 |
+
"epoch": 1.73,
|
1601 |
+
"grad_norm": 6.6517486572265625,
|
1602 |
+
"learning_rate": 1.383534792551454e-06,
|
1603 |
+
"loss": 0.1482,
|
1604 |
+
"step": 5375
|
1605 |
+
},
|
1606 |
+
{
|
1607 |
+
"epoch": 1.74,
|
1608 |
+
"grad_norm": 9.937956809997559,
|
1609 |
+
"learning_rate": 1.3426984645540676e-06,
|
1610 |
+
"loss": 0.1306,
|
1611 |
+
"step": 5400
|
1612 |
+
},
|
1613 |
+
{
|
1614 |
+
"epoch": 1.74,
|
1615 |
+
"grad_norm": 4.228558540344238,
|
1616 |
+
"learning_rate": 1.301862136556681e-06,
|
1617 |
+
"loss": 0.1229,
|
1618 |
+
"step": 5425
|
1619 |
+
},
|
1620 |
+
{
|
1621 |
+
"epoch": 1.75,
|
1622 |
+
"grad_norm": 4.710421085357666,
|
1623 |
+
"learning_rate": 1.2610258085592945e-06,
|
1624 |
+
"loss": 0.1374,
|
1625 |
+
"step": 5450
|
1626 |
+
},
|
1627 |
+
{
|
1628 |
+
"epoch": 1.76,
|
1629 |
+
"grad_norm": 4.934779644012451,
|
1630 |
+
"learning_rate": 1.220189480561908e-06,
|
1631 |
+
"loss": 0.1321,
|
1632 |
+
"step": 5475
|
1633 |
+
},
|
1634 |
+
{
|
1635 |
+
"epoch": 1.77,
|
1636 |
+
"grad_norm": 9.244394302368164,
|
1637 |
+
"learning_rate": 1.1793531525645214e-06,
|
1638 |
+
"loss": 0.1343,
|
1639 |
+
"step": 5500
|
1640 |
+
},
|
1641 |
+
{
|
1642 |
+
"epoch": 1.77,
|
1643 |
+
"eval_cer": 30.15735431390852,
|
1644 |
+
"eval_loss": 0.3783666491508484,
|
1645 |
+
"eval_runtime": 1871.1459,
|
1646 |
+
"eval_samples_per_second": 2.371,
|
1647 |
+
"eval_steps_per_second": 0.297,
|
1648 |
+
"step": 5500
|
1649 |
}
|
1650 |
],
|
1651 |
"logging_steps": 25,
|
|
|
1653 |
"num_input_tokens_seen": 0,
|
1654 |
"num_train_epochs": 2,
|
1655 |
"save_steps": 500,
|
1656 |
+
"total_flos": 2.539551522816e+19,
|
1657 |
"train_batch_size": 16,
|
1658 |
"trial_name": null,
|
1659 |
"trial_params": null
|