Training in progress, step 273, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step273/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step273/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step273/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step273/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step273/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step273/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step273/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step273/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step273/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step273/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step273/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step273/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step273/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step273/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step273/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step273/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +284 -3
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 550593856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a53baa7fb5e46eac3e86e83348e470e62e4cdc1131a11ff07d700e96aad64796
|
3 |
size 550593856
|
last-checkpoint/global_step273/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93a239c8f5eeffc87a3b8e84887a8e05754968385fd63a5d952453946899e284
|
3 |
+
size 243591168
|
last-checkpoint/global_step273/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd156f4f78904d80425ea079796a91220c0b68adb7c6e56666df77f5e7f8b0de
|
3 |
+
size 243591168
|
last-checkpoint/global_step273/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34f55e9e2f19392698a61fe9dab747c965188add3ca8fec5c6dceab18965c9c1
|
3 |
+
size 243591168
|
last-checkpoint/global_step273/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ed25b7b5f856c355e47d3ef5260dbe1eea9cd39bf20021965f1afdc484fa415
|
3 |
+
size 243591168
|
last-checkpoint/global_step273/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cb8d8735dd5765734ff469a79c7e07d58d7fa443834ecf1894859683a69e988
|
3 |
+
size 243591168
|
last-checkpoint/global_step273/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4bea816c3f2a08032ef80be20f31cbe8d5da66d4d6f61409b08fedf474ef878b
|
3 |
+
size 243591168
|
last-checkpoint/global_step273/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a3e5bbecc6e372d25f4f43f2a7df3c06ad9c581bd6f4fae1bf283e553e6c724
|
3 |
+
size 243591168
|
last-checkpoint/global_step273/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:307d63839128477e9f83d4c253d822d531bd8b174e46071c70f491619a38a8d6
|
3 |
+
size 243591168
|
last-checkpoint/global_step273/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72123919b4ca6ac83b03bad1a1a6a6f896c387dbb5c8e566d9b98a18293024a8
|
3 |
+
size 211435686
|
last-checkpoint/global_step273/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed9e7ba5ee59bbce7b1d7a8369741b614c14992dc1f12810792d6fe8248cad3f
|
3 |
+
size 211435686
|
last-checkpoint/global_step273/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:943702d997113425be1f61993218522594c7d4297944a57a576aed739483a681
|
3 |
+
size 211435686
|
last-checkpoint/global_step273/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:febb76d47320eb49ed6bcaa76d04f022e91034f9d1abf3336cc1fe70cd1c8d34
|
3 |
+
size 211435686
|
last-checkpoint/global_step273/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6884ccf280ecc14775a811ad4b58c64cda56b638b1380f3ede0ae967c22dd065
|
3 |
+
size 211435686
|
last-checkpoint/global_step273/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0955da2b84b95532c43e619af600459d2cce507a9d146c6e1d2cfc12696c3550
|
3 |
+
size 211435686
|
last-checkpoint/global_step273/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:715cc62fb431d81ab58761b15cb5630539e18a58a338cc4d1e7cebf7023bd47d
|
3 |
+
size 211435686
|
last-checkpoint/global_step273/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e573d32f57b269bdeb3b3d8e102ab36d5da34a30c184b2e90e1a4da387671078
|
3 |
+
size 211435686
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step273
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa5a01ac495960a4bdf4e8e3d767478ddeae6f6e48a0785c78a39b02d9f03944
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94a997267e4e5e22741250c431d4119f75a51dd8d32f691af3d6ddfcdd72fb96
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99e4fe0ad3a692bdc4f5dd7af7febcbaf52826f91569d816d162d70c3d5aae57
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5efce2552c4a4695a29bf25d9f63b6643d4a8ee75838c6e78f968cfeb77ced6
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82a6db6f4ce94d4baff585d4bf8aaabf2351a80d5b9ea39e0f01f54a07f8cc7d
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:098920af0acbca97e22a72d86dd88a3b5fb1ee4a312e7aa98369fc3e28978653
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e14f5793871a938571a938379ee73acad2b8bbc41260bdbfeeef799929af076
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c70ca795c779e38cba23bef63d56eb87a6d53fdb68031316ac8452b61a60aa3b
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f96a436204d535377b5df836584c2a61915d36dc9059dc3240944efd6133bb4f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 39,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1701,6 +1701,287 @@
|
|
1701 |
"eval_samples_per_second": 1.225,
|
1702 |
"eval_steps_per_second": 0.153,
|
1703 |
"step": 234
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1704 |
}
|
1705 |
],
|
1706 |
"logging_steps": 1,
|
@@ -1720,7 +2001,7 @@
|
|
1720 |
"attributes": {}
|
1721 |
}
|
1722 |
},
|
1723 |
-
"total_flos":
|
1724 |
"train_batch_size": 1,
|
1725 |
"trial_name": null,
|
1726 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7109375,
|
5 |
"eval_steps": 39,
|
6 |
+
"global_step": 273,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1701 |
"eval_samples_per_second": 1.225,
|
1702 |
"eval_steps_per_second": 0.153,
|
1703 |
"step": 234
|
1704 |
+
},
|
1705 |
+
{
|
1706 |
+
"epoch": 0.6119791666666666,
|
1707 |
+
"grad_norm": 0.15343655698839206,
|
1708 |
+
"learning_rate": 4.235591624451763e-05,
|
1709 |
+
"loss": 2.5605,
|
1710 |
+
"step": 235
|
1711 |
+
},
|
1712 |
+
{
|
1713 |
+
"epoch": 0.6145833333333334,
|
1714 |
+
"grad_norm": 0.14304569592672528,
|
1715 |
+
"learning_rate": 4.198365462085446e-05,
|
1716 |
+
"loss": 2.5812,
|
1717 |
+
"step": 236
|
1718 |
+
},
|
1719 |
+
{
|
1720 |
+
"epoch": 0.6171875,
|
1721 |
+
"grad_norm": 0.15320835548074707,
|
1722 |
+
"learning_rate": 4.161236257651587e-05,
|
1723 |
+
"loss": 2.5275,
|
1724 |
+
"step": 237
|
1725 |
+
},
|
1726 |
+
{
|
1727 |
+
"epoch": 0.6197916666666666,
|
1728 |
+
"grad_norm": 0.15452569712518102,
|
1729 |
+
"learning_rate": 4.1242067768811134e-05,
|
1730 |
+
"loss": 2.4707,
|
1731 |
+
"step": 238
|
1732 |
+
},
|
1733 |
+
{
|
1734 |
+
"epoch": 0.6223958333333334,
|
1735 |
+
"grad_norm": 0.16320750887974791,
|
1736 |
+
"learning_rate": 4.0872797780765946e-05,
|
1737 |
+
"loss": 2.3996,
|
1738 |
+
"step": 239
|
1739 |
+
},
|
1740 |
+
{
|
1741 |
+
"epoch": 0.625,
|
1742 |
+
"grad_norm": 0.14194397858802227,
|
1743 |
+
"learning_rate": 4.0504580119067933e-05,
|
1744 |
+
"loss": 2.2431,
|
1745 |
+
"step": 240
|
1746 |
+
},
|
1747 |
+
{
|
1748 |
+
"epoch": 0.6276041666666666,
|
1749 |
+
"grad_norm": 0.1437781011206105,
|
1750 |
+
"learning_rate": 4.01374422120175e-05,
|
1751 |
+
"loss": 2.3929,
|
1752 |
+
"step": 241
|
1753 |
+
},
|
1754 |
+
{
|
1755 |
+
"epoch": 0.6302083333333334,
|
1756 |
+
"grad_norm": 0.14383943780586664,
|
1757 |
+
"learning_rate": 3.977141140748484e-05,
|
1758 |
+
"loss": 2.3989,
|
1759 |
+
"step": 242
|
1760 |
+
},
|
1761 |
+
{
|
1762 |
+
"epoch": 0.6328125,
|
1763 |
+
"grad_norm": 0.16948819133813695,
|
1764 |
+
"learning_rate": 3.94065149708728e-05,
|
1765 |
+
"loss": 2.4256,
|
1766 |
+
"step": 243
|
1767 |
+
},
|
1768 |
+
{
|
1769 |
+
"epoch": 0.6354166666666666,
|
1770 |
+
"grad_norm": 0.14755837584042042,
|
1771 |
+
"learning_rate": 3.904278008308589e-05,
|
1772 |
+
"loss": 2.2711,
|
1773 |
+
"step": 244
|
1774 |
+
},
|
1775 |
+
{
|
1776 |
+
"epoch": 0.6380208333333334,
|
1777 |
+
"grad_norm": 0.15519908616035058,
|
1778 |
+
"learning_rate": 3.868023383850556e-05,
|
1779 |
+
"loss": 2.4623,
|
1780 |
+
"step": 245
|
1781 |
+
},
|
1782 |
+
{
|
1783 |
+
"epoch": 0.640625,
|
1784 |
+
"grad_norm": 0.16021538951276384,
|
1785 |
+
"learning_rate": 3.831890324297197e-05,
|
1786 |
+
"loss": 2.3857,
|
1787 |
+
"step": 246
|
1788 |
+
},
|
1789 |
+
{
|
1790 |
+
"epoch": 0.6432291666666666,
|
1791 |
+
"grad_norm": 0.15886505001684953,
|
1792 |
+
"learning_rate": 3.795881521177236e-05,
|
1793 |
+
"loss": 2.5196,
|
1794 |
+
"step": 247
|
1795 |
+
},
|
1796 |
+
{
|
1797 |
+
"epoch": 0.6458333333333334,
|
1798 |
+
"grad_norm": 0.16005244689800305,
|
1799 |
+
"learning_rate": 3.7599996567636156e-05,
|
1800 |
+
"loss": 2.406,
|
1801 |
+
"step": 248
|
1802 |
+
},
|
1803 |
+
{
|
1804 |
+
"epoch": 0.6484375,
|
1805 |
+
"grad_norm": 0.1470078515999776,
|
1806 |
+
"learning_rate": 3.724247403873694e-05,
|
1807 |
+
"loss": 2.4975,
|
1808 |
+
"step": 249
|
1809 |
+
},
|
1810 |
+
{
|
1811 |
+
"epoch": 0.6510416666666666,
|
1812 |
+
"grad_norm": 0.1560311928142992,
|
1813 |
+
"learning_rate": 3.688627425670147e-05,
|
1814 |
+
"loss": 2.374,
|
1815 |
+
"step": 250
|
1816 |
+
},
|
1817 |
+
{
|
1818 |
+
"epoch": 0.6536458333333334,
|
1819 |
+
"grad_norm": 0.15349615074542047,
|
1820 |
+
"learning_rate": 3.653142375462596e-05,
|
1821 |
+
"loss": 2.4155,
|
1822 |
+
"step": 251
|
1823 |
+
},
|
1824 |
+
{
|
1825 |
+
"epoch": 0.65625,
|
1826 |
+
"grad_norm": 0.20641136824203335,
|
1827 |
+
"learning_rate": 3.6177948965099585e-05,
|
1828 |
+
"loss": 2.4358,
|
1829 |
+
"step": 252
|
1830 |
+
},
|
1831 |
+
{
|
1832 |
+
"epoch": 0.6588541666666666,
|
1833 |
+
"grad_norm": 0.14326347914064022,
|
1834 |
+
"learning_rate": 3.582587621823558e-05,
|
1835 |
+
"loss": 2.4528,
|
1836 |
+
"step": 253
|
1837 |
+
},
|
1838 |
+
{
|
1839 |
+
"epoch": 0.6614583333333334,
|
1840 |
+
"grad_norm": 0.13097442902507145,
|
1841 |
+
"learning_rate": 3.547523173970989e-05,
|
1842 |
+
"loss": 2.3682,
|
1843 |
+
"step": 254
|
1844 |
+
},
|
1845 |
+
{
|
1846 |
+
"epoch": 0.6640625,
|
1847 |
+
"grad_norm": 0.14938841182330287,
|
1848 |
+
"learning_rate": 3.51260416488077e-05,
|
1849 |
+
"loss": 2.5273,
|
1850 |
+
"step": 255
|
1851 |
+
},
|
1852 |
+
{
|
1853 |
+
"epoch": 0.6666666666666666,
|
1854 |
+
"grad_norm": 0.1532472206020292,
|
1855 |
+
"learning_rate": 3.477833195647773e-05,
|
1856 |
+
"loss": 2.5301,
|
1857 |
+
"step": 256
|
1858 |
+
},
|
1859 |
+
{
|
1860 |
+
"epoch": 0.6692708333333334,
|
1861 |
+
"grad_norm": 0.12783586158700921,
|
1862 |
+
"learning_rate": 3.443212856339481e-05,
|
1863 |
+
"loss": 2.3279,
|
1864 |
+
"step": 257
|
1865 |
+
},
|
1866 |
+
{
|
1867 |
+
"epoch": 0.671875,
|
1868 |
+
"grad_norm": 0.15547617846106007,
|
1869 |
+
"learning_rate": 3.408745725803042e-05,
|
1870 |
+
"loss": 2.4209,
|
1871 |
+
"step": 258
|
1872 |
+
},
|
1873 |
+
{
|
1874 |
+
"epoch": 0.6744791666666666,
|
1875 |
+
"grad_norm": 0.13531260542176757,
|
1876 |
+
"learning_rate": 3.3744343714731835e-05,
|
1877 |
+
"loss": 2.3595,
|
1878 |
+
"step": 259
|
1879 |
+
},
|
1880 |
+
{
|
1881 |
+
"epoch": 0.6770833333333334,
|
1882 |
+
"grad_norm": 0.1488109854224464,
|
1883 |
+
"learning_rate": 3.3402813491809623e-05,
|
1884 |
+
"loss": 2.2631,
|
1885 |
+
"step": 260
|
1886 |
+
},
|
1887 |
+
{
|
1888 |
+
"epoch": 0.6796875,
|
1889 |
+
"grad_norm": 0.1753652780376821,
|
1890 |
+
"learning_rate": 3.3062892029633817e-05,
|
1891 |
+
"loss": 2.4748,
|
1892 |
+
"step": 261
|
1893 |
+
},
|
1894 |
+
{
|
1895 |
+
"epoch": 0.6822916666666666,
|
1896 |
+
"grad_norm": 0.1593205802273226,
|
1897 |
+
"learning_rate": 3.272460464873884e-05,
|
1898 |
+
"loss": 2.4484,
|
1899 |
+
"step": 262
|
1900 |
+
},
|
1901 |
+
{
|
1902 |
+
"epoch": 0.6848958333333334,
|
1903 |
+
"grad_norm": 0.1401885264986934,
|
1904 |
+
"learning_rate": 3.238797654793752e-05,
|
1905 |
+
"loss": 2.4234,
|
1906 |
+
"step": 263
|
1907 |
+
},
|
1908 |
+
{
|
1909 |
+
"epoch": 0.6875,
|
1910 |
+
"grad_norm": 0.15910838717602993,
|
1911 |
+
"learning_rate": 3.205303280244389e-05,
|
1912 |
+
"loss": 2.4679,
|
1913 |
+
"step": 264
|
1914 |
+
},
|
1915 |
+
{
|
1916 |
+
"epoch": 0.6901041666666666,
|
1917 |
+
"grad_norm": 0.16179488061734165,
|
1918 |
+
"learning_rate": 3.1719798362005444e-05,
|
1919 |
+
"loss": 2.4883,
|
1920 |
+
"step": 265
|
1921 |
+
},
|
1922 |
+
{
|
1923 |
+
"epoch": 0.6927083333333334,
|
1924 |
+
"grad_norm": 0.15848452909780508,
|
1925 |
+
"learning_rate": 3.138829804904464e-05,
|
1926 |
+
"loss": 2.4583,
|
1927 |
+
"step": 266
|
1928 |
+
},
|
1929 |
+
{
|
1930 |
+
"epoch": 0.6953125,
|
1931 |
+
"grad_norm": 0.16307212504652477,
|
1932 |
+
"learning_rate": 3.105855655680986e-05,
|
1933 |
+
"loss": 2.3327,
|
1934 |
+
"step": 267
|
1935 |
+
},
|
1936 |
+
{
|
1937 |
+
"epoch": 0.6979166666666666,
|
1938 |
+
"grad_norm": 0.1467535420565889,
|
1939 |
+
"learning_rate": 3.073059844753604e-05,
|
1940 |
+
"loss": 2.4382,
|
1941 |
+
"step": 268
|
1942 |
+
},
|
1943 |
+
{
|
1944 |
+
"epoch": 0.7005208333333334,
|
1945 |
+
"grad_norm": 0.14201414432531673,
|
1946 |
+
"learning_rate": 3.0404448150615063e-05,
|
1947 |
+
"loss": 2.3501,
|
1948 |
+
"step": 269
|
1949 |
+
},
|
1950 |
+
{
|
1951 |
+
"epoch": 0.703125,
|
1952 |
+
"grad_norm": 0.1549923554458448,
|
1953 |
+
"learning_rate": 3.0080129960776017e-05,
|
1954 |
+
"loss": 2.396,
|
1955 |
+
"step": 270
|
1956 |
+
},
|
1957 |
+
{
|
1958 |
+
"epoch": 0.7057291666666666,
|
1959 |
+
"grad_norm": 0.1524034184779795,
|
1960 |
+
"learning_rate": 2.9757668036275477e-05,
|
1961 |
+
"loss": 2.2784,
|
1962 |
+
"step": 271
|
1963 |
+
},
|
1964 |
+
{
|
1965 |
+
"epoch": 0.7083333333333334,
|
1966 |
+
"grad_norm": 0.16256677012412982,
|
1967 |
+
"learning_rate": 2.9437086397097995e-05,
|
1968 |
+
"loss": 2.3027,
|
1969 |
+
"step": 272
|
1970 |
+
},
|
1971 |
+
{
|
1972 |
+
"epoch": 0.7109375,
|
1973 |
+
"grad_norm": 0.15633557288864075,
|
1974 |
+
"learning_rate": 2.9118408923166875e-05,
|
1975 |
+
"loss": 2.5473,
|
1976 |
+
"step": 273
|
1977 |
+
},
|
1978 |
+
{
|
1979 |
+
"epoch": 0.7109375,
|
1980 |
+
"eval_loss": 2.3996334075927734,
|
1981 |
+
"eval_runtime": 65.6177,
|
1982 |
+
"eval_samples_per_second": 1.219,
|
1983 |
+
"eval_steps_per_second": 0.152,
|
1984 |
+
"step": 273
|
1985 |
}
|
1986 |
],
|
1987 |
"logging_steps": 1,
|
|
|
2001 |
"attributes": {}
|
2002 |
}
|
2003 |
},
|
2004 |
+
"total_flos": 90247866089472.0,
|
2005 |
"train_batch_size": 1,
|
2006 |
"trial_name": null,
|
2007 |
"trial_params": null
|