Training in progress, step 80000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca7b30dd7d3cd1b36c3fa0208b9738aaffe2cc51d1a7feb434cdea790af51c1c
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2721d72b42462f692fdf3de82485b9d24d7776a465480990b2df5c24285a8e05
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf0ceabc8cac9357e0c979c52b36fdfc5cf30a4d8ec50dea1b83b540b532596
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf0ceabc8cac9357e0c979c52b36fdfc5cf30a4d8ec50dea1b83b540b532596
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf0ceabc8cac9357e0c979c52b36fdfc5cf30a4d8ec50dea1b83b540b532596
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf0ceabc8cac9357e0c979c52b36fdfc5cf30a4d8ec50dea1b83b540b532596
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf0ceabc8cac9357e0c979c52b36fdfc5cf30a4d8ec50dea1b83b540b532596
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf0ceabc8cac9357e0c979c52b36fdfc5cf30a4d8ec50dea1b83b540b532596
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf0ceabc8cac9357e0c979c52b36fdfc5cf30a4d8ec50dea1b83b540b532596
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bf0ceabc8cac9357e0c979c52b36fdfc5cf30a4d8ec50dea1b83b540b532596
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ff31836ff2c96f7fb19d95df664b507273477e3a4f87dcce611b28b7e31820b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1406,11 +1406,211 @@
|
|
1406 |
"eval_samples_per_second": 1537.403,
|
1407 |
"eval_steps_per_second": 24.481,
|
1408 |
"step": 70000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1409 |
}
|
1410 |
],
|
1411 |
"max_steps": 500000,
|
1412 |
"num_train_epochs": 12,
|
1413 |
-
"total_flos": 2.
|
1414 |
"trial_name": null,
|
1415 |
"trial_params": null
|
1416 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.863976327500641,
|
5 |
+
"global_step": 80000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1406 |
"eval_samples_per_second": 1537.403,
|
1407 |
"eval_steps_per_second": 24.481,
|
1408 |
"step": 70000
|
1409 |
+
},
|
1410 |
+
{
|
1411 |
+
"epoch": 1.64,
|
1412 |
+
"learning_rate": 0.0002934838153624519,
|
1413 |
+
"loss": 0.3166,
|
1414 |
+
"step": 70500
|
1415 |
+
},
|
1416 |
+
{
|
1417 |
+
"epoch": 1.65,
|
1418 |
+
"learning_rate": 0.00029334092796560427,
|
1419 |
+
"loss": 0.3161,
|
1420 |
+
"step": 71000
|
1421 |
+
},
|
1422 |
+
{
|
1423 |
+
"epoch": 1.65,
|
1424 |
+
"eval_loss": 0.2996619939804077,
|
1425 |
+
"eval_runtime": 1.4139,
|
1426 |
+
"eval_samples_per_second": 1554.517,
|
1427 |
+
"eval_steps_per_second": 24.753,
|
1428 |
+
"step": 71000
|
1429 |
+
},
|
1430 |
+
{
|
1431 |
+
"epoch": 1.67,
|
1432 |
+
"learning_rate": 0.0002931965276945326,
|
1433 |
+
"loss": 0.3159,
|
1434 |
+
"step": 71500
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"epoch": 1.68,
|
1438 |
+
"learning_rate": 0.0002930506161283751,
|
1439 |
+
"loss": 0.3156,
|
1440 |
+
"step": 72000
|
1441 |
+
},
|
1442 |
+
{
|
1443 |
+
"epoch": 1.68,
|
1444 |
+
"eval_loss": 0.29820874333381653,
|
1445 |
+
"eval_runtime": 1.408,
|
1446 |
+
"eval_samples_per_second": 1561.093,
|
1447 |
+
"eval_steps_per_second": 24.858,
|
1448 |
+
"step": 72000
|
1449 |
+
},
|
1450 |
+
{
|
1451 |
+
"epoch": 1.69,
|
1452 |
+
"learning_rate": 0.00029290319486279724,
|
1453 |
+
"loss": 0.3152,
|
1454 |
+
"step": 72500
|
1455 |
+
},
|
1456 |
+
{
|
1457 |
+
"epoch": 1.7,
|
1458 |
+
"learning_rate": 0.0002927542655099744,
|
1459 |
+
"loss": 0.3148,
|
1460 |
+
"step": 73000
|
1461 |
+
},
|
1462 |
+
{
|
1463 |
+
"epoch": 1.7,
|
1464 |
+
"eval_loss": 0.2982105612754822,
|
1465 |
+
"eval_runtime": 1.3918,
|
1466 |
+
"eval_samples_per_second": 1579.253,
|
1467 |
+
"eval_steps_per_second": 25.147,
|
1468 |
+
"step": 73000
|
1469 |
+
},
|
1470 |
+
{
|
1471 |
+
"epoch": 1.71,
|
1472 |
+
"learning_rate": 0.00029260382969857417,
|
1473 |
+
"loss": 0.3148,
|
1474 |
+
"step": 73500
|
1475 |
+
},
|
1476 |
+
{
|
1477 |
+
"epoch": 1.72,
|
1478 |
+
"learning_rate": 0.00029245188907373845,
|
1479 |
+
"loss": 0.3143,
|
1480 |
+
"step": 74000
|
1481 |
+
},
|
1482 |
+
{
|
1483 |
+
"epoch": 1.72,
|
1484 |
+
"eval_loss": 0.2960808277130127,
|
1485 |
+
"eval_runtime": 1.4226,
|
1486 |
+
"eval_samples_per_second": 1545.079,
|
1487 |
+
"eval_steps_per_second": 24.603,
|
1488 |
+
"step": 74000
|
1489 |
+
},
|
1490 |
+
{
|
1491 |
+
"epoch": 1.74,
|
1492 |
+
"learning_rate": 0.0002922984452970655,
|
1493 |
+
"loss": 0.3141,
|
1494 |
+
"step": 74500
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 1.75,
|
1498 |
+
"learning_rate": 0.000292143500046592,
|
1499 |
+
"loss": 0.3137,
|
1500 |
+
"step": 75000
|
1501 |
+
},
|
1502 |
+
{
|
1503 |
+
"epoch": 1.75,
|
1504 |
+
"eval_loss": 0.2935367524623871,
|
1505 |
+
"eval_runtime": 1.3992,
|
1506 |
+
"eval_samples_per_second": 1570.943,
|
1507 |
+
"eval_steps_per_second": 25.015,
|
1508 |
+
"step": 75000
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 1.76,
|
1512 |
+
"learning_rate": 0.0002919870550167743,
|
1513 |
+
"loss": 0.3133,
|
1514 |
+
"step": 75500
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 1.77,
|
1518 |
+
"learning_rate": 0.0002918291119184702,
|
1519 |
+
"loss": 0.3132,
|
1520 |
+
"step": 76000
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 1.77,
|
1524 |
+
"eval_loss": 0.2974264919757843,
|
1525 |
+
"eval_runtime": 1.3828,
|
1526 |
+
"eval_samples_per_second": 1589.484,
|
1527 |
+
"eval_steps_per_second": 25.31,
|
1528 |
+
"step": 76000
|
1529 |
+
},
|
1530 |
+
{
|
1531 |
+
"epoch": 1.78,
|
1532 |
+
"learning_rate": 0.0002916696724789201,
|
1533 |
+
"loss": 0.3134,
|
1534 |
+
"step": 76500
|
1535 |
+
},
|
1536 |
+
{
|
1537 |
+
"epoch": 1.79,
|
1538 |
+
"learning_rate": 0.00029150873844172823,
|
1539 |
+
"loss": 0.313,
|
1540 |
+
"step": 77000
|
1541 |
+
},
|
1542 |
+
{
|
1543 |
+
"epoch": 1.79,
|
1544 |
+
"eval_loss": 0.29650062322616577,
|
1545 |
+
"eval_runtime": 1.4037,
|
1546 |
+
"eval_samples_per_second": 1565.884,
|
1547 |
+
"eval_steps_per_second": 24.934,
|
1548 |
+
"step": 77000
|
1549 |
+
},
|
1550 |
+
{
|
1551 |
+
"epoch": 1.81,
|
1552 |
+
"learning_rate": 0.00029134631156684334,
|
1553 |
+
"loss": 0.3128,
|
1554 |
+
"step": 77500
|
1555 |
+
},
|
1556 |
+
{
|
1557 |
+
"epoch": 1.82,
|
1558 |
+
"learning_rate": 0.0002911823936305398,
|
1559 |
+
"loss": 0.3147,
|
1560 |
+
"step": 78000
|
1561 |
+
},
|
1562 |
+
{
|
1563 |
+
"epoch": 1.82,
|
1564 |
+
"eval_loss": 0.2946864664554596,
|
1565 |
+
"eval_runtime": 1.3928,
|
1566 |
+
"eval_samples_per_second": 1578.113,
|
1567 |
+
"eval_steps_per_second": 25.129,
|
1568 |
+
"step": 78000
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 1.83,
|
1572 |
+
"learning_rate": 0.0002910169864253979,
|
1573 |
+
"loss": 0.3123,
|
1574 |
+
"step": 78500
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 1.84,
|
1578 |
+
"learning_rate": 0.0002908500917602842,
|
1579 |
+
"loss": 0.3118,
|
1580 |
+
"step": 79000
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 1.84,
|
1584 |
+
"eval_loss": 0.2940049469470978,
|
1585 |
+
"eval_runtime": 1.405,
|
1586 |
+
"eval_samples_per_second": 1564.438,
|
1587 |
+
"eval_steps_per_second": 24.911,
|
1588 |
+
"step": 79000
|
1589 |
+
},
|
1590 |
+
{
|
1591 |
+
"epoch": 1.85,
|
1592 |
+
"learning_rate": 0.00029068171146033226,
|
1593 |
+
"loss": 0.3115,
|
1594 |
+
"step": 79500
|
1595 |
+
},
|
1596 |
+
{
|
1597 |
+
"epoch": 1.86,
|
1598 |
+
"learning_rate": 0.0002905118473669218,
|
1599 |
+
"loss": 0.3113,
|
1600 |
+
"step": 80000
|
1601 |
+
},
|
1602 |
+
{
|
1603 |
+
"epoch": 1.86,
|
1604 |
+
"eval_loss": 0.293503075838089,
|
1605 |
+
"eval_runtime": 1.4595,
|
1606 |
+
"eval_samples_per_second": 1505.956,
|
1607 |
+
"eval_steps_per_second": 23.98,
|
1608 |
+
"step": 80000
|
1609 |
}
|
1610 |
],
|
1611 |
"max_steps": 500000,
|
1612 |
"num_train_epochs": 12,
|
1613 |
+
"total_flos": 2.555894613205093e+21,
|
1614 |
"trial_name": null,
|
1615 |
"trial_params": null
|
1616 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2721d72b42462f692fdf3de82485b9d24d7776a465480990b2df5c24285a8e05
|
3 |
size 102501541
|