Daewon0808
commited on
Commit
•
3df4e32
1
Parent(s):
48f377f
Training in progress, step 7724, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step7724/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step7724/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step7724/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step7724/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step7724/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step7724/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step7724/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step7724/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2276 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 10107504
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc3067f6f49803d6483ed250997366f38d0ab2f09b2da22d2fabe6ac0fb75ff6
|
3 |
size 10107504
|
last-checkpoint/global_step7724/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97c6b7bff0e93774276654dfabe2789287685eff81eb0356b3c4e432d008879b
|
3 |
+
size 15142384
|
last-checkpoint/global_step7724/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0ae9c9e58dc038648826e0b8f756489ad36eeb01e206fc42eeb3371aa7ab4f8
|
3 |
+
size 15142384
|
last-checkpoint/global_step7724/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:873d603f9ba9580b5e130bff47347dcc9654a60665ff32cf88b22531d21ad096
|
3 |
+
size 15142384
|
last-checkpoint/global_step7724/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b48442fffd98f620e79aec539490b8f3738cb7abd39f3ed80c2070d65eac7eb7
|
3 |
+
size 15142384
|
last-checkpoint/global_step7724/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99fd6c868aa11063fc90b4a235b658c1f06846579371a1ca435a3752fc94024d
|
3 |
+
size 133406
|
last-checkpoint/global_step7724/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4b34b591a64db0bd036adac493e5bd479a400986798fb848eab6c0451bc8b0
|
3 |
+
size 133406
|
last-checkpoint/global_step7724/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f591073404fd55b339bcc2665c6fbfbcea9d2cbdc3cec4ef877565ba3e794c9e
|
3 |
+
size 133406
|
last-checkpoint/global_step7724/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:454aca661de8e2cda3725c47af8a9a9cdeac92329976320a37ebc99b58ff8fbe
|
3 |
+
size 133406
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step7724
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:365955c51abdb92c80c42d93201c5209f863b3153268eb4a953da4966f1e43a5
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:833cfceec5f4c1e2e2fd595b8054119e44fc059867ce43ae83edc5c5ad6c4340
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf0517c56512d4ae7a3361a1e8dacb66d486d479cf260ca1d6f51f6ee19825e3
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ed8eaad9e0b09f1cddb62096ef671eb06bf7a4be830ed6580db5b8e23e266ae
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3270c748caccc6a16e6d4f08adacc13be568ae6a10d730ba03d05068ed26e9d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -76523,6 +76523,2278 @@
|
|
76523 |
"eval_samples_per_second": 3.173,
|
76524 |
"eval_steps_per_second": 0.216,
|
76525 |
"step": 7500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76526 |
}
|
76527 |
],
|
76528 |
"logging_steps": 1,
|
@@ -76537,12 +78809,12 @@
|
|
76537 |
"should_evaluate": false,
|
76538 |
"should_log": false,
|
76539 |
"should_save": true,
|
76540 |
-
"should_training_stop":
|
76541 |
},
|
76542 |
"attributes": {}
|
76543 |
}
|
76544 |
},
|
76545 |
-
"total_flos":
|
76546 |
"train_batch_size": 4,
|
76547 |
"trial_name": null,
|
76548 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9999676343981616,
|
5 |
"eval_steps": 5,
|
6 |
+
"global_step": 7724,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
76523 |
"eval_samples_per_second": 3.173,
|
76524 |
"eval_steps_per_second": 0.216,
|
76525 |
"step": 7500
|
76526 |
+
},
|
76527 |
+
{
|
76528 |
+
"epoch": 0.971097517558339,
|
76529 |
+
"grad_norm": 2.078256325407057,
|
76530 |
+
"learning_rate": 3.171737588503407e-09,
|
76531 |
+
"loss": 2.4272,
|
76532 |
+
"step": 7501
|
76533 |
+
},
|
76534 |
+
{
|
76535 |
+
"epoch": 0.9712269799656925,
|
76536 |
+
"grad_norm": 1.366999089040965,
|
76537 |
+
"learning_rate": 3.1433791070100555e-09,
|
76538 |
+
"loss": 2.229,
|
76539 |
+
"step": 7502
|
76540 |
+
},
|
76541 |
+
{
|
76542 |
+
"epoch": 0.971356442373046,
|
76543 |
+
"grad_norm": 1.8412842996905656,
|
76544 |
+
"learning_rate": 3.1151476523359064e-09,
|
76545 |
+
"loss": 2.374,
|
76546 |
+
"step": 7503
|
76547 |
+
},
|
76548 |
+
{
|
76549 |
+
"epoch": 0.9714859047803994,
|
76550 |
+
"grad_norm": 2.4036313021269984,
|
76551 |
+
"learning_rate": 3.0870432302479435e-09,
|
76552 |
+
"loss": 2.552,
|
76553 |
+
"step": 7504
|
76554 |
+
},
|
76555 |
+
{
|
76556 |
+
"epoch": 0.9716153671877529,
|
76557 |
+
"grad_norm": 1.3319207705837204,
|
76558 |
+
"learning_rate": 3.059065846487061e-09,
|
76559 |
+
"loss": 2.4368,
|
76560 |
+
"step": 7505
|
76561 |
+
},
|
76562 |
+
{
|
76563 |
+
"epoch": 0.9716153671877529,
|
76564 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76565 |
+
"eval_PRM F1": 0.047058823529411764,
|
76566 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76567 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76568 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76569 |
+
"eval_PRM Precision": 1.0,
|
76570 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76571 |
+
"eval_PRM Specificty": 1.0,
|
76572 |
+
"eval_loss": 1.4964932203292847,
|
76573 |
+
"eval_runtime": 14.552,
|
76574 |
+
"eval_samples_per_second": 3.024,
|
76575 |
+
"eval_steps_per_second": 0.206,
|
76576 |
+
"step": 7505
|
76577 |
+
},
|
76578 |
+
{
|
76579 |
+
"epoch": 0.9717448295951063,
|
76580 |
+
"grad_norm": 2.604357541325846,
|
76581 |
+
"learning_rate": 3.031215506768062e-09,
|
76582 |
+
"loss": 2.4111,
|
76583 |
+
"step": 7506
|
76584 |
+
},
|
76585 |
+
{
|
76586 |
+
"epoch": 0.9718742920024598,
|
76587 |
+
"grad_norm": 1.7566724147974662,
|
76588 |
+
"learning_rate": 3.003492216780077e-09,
|
76589 |
+
"loss": 2.4978,
|
76590 |
+
"step": 7507
|
76591 |
+
},
|
76592 |
+
{
|
76593 |
+
"epoch": 0.9720037544098132,
|
76594 |
+
"grad_norm": 1.8860731801998685,
|
76595 |
+
"learning_rate": 2.975895982186075e-09,
|
76596 |
+
"loss": 2.1368,
|
76597 |
+
"step": 7508
|
76598 |
+
},
|
76599 |
+
{
|
76600 |
+
"epoch": 0.9721332168171667,
|
76601 |
+
"grad_norm": 1.336728000590379,
|
76602 |
+
"learning_rate": 2.948426808623145e-09,
|
76603 |
+
"loss": 2.3132,
|
76604 |
+
"step": 7509
|
76605 |
+
},
|
76606 |
+
{
|
76607 |
+
"epoch": 0.9722626792245201,
|
76608 |
+
"grad_norm": 1.6261016992599011,
|
76609 |
+
"learning_rate": 2.9210847017024922e-09,
|
76610 |
+
"loss": 2.2693,
|
76611 |
+
"step": 7510
|
76612 |
+
},
|
76613 |
+
{
|
76614 |
+
"epoch": 0.9722626792245201,
|
76615 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76616 |
+
"eval_PRM F1": 0.047058823529411764,
|
76617 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76618 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76619 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76620 |
+
"eval_PRM Precision": 1.0,
|
76621 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76622 |
+
"eval_PRM Specificty": 1.0,
|
76623 |
+
"eval_loss": 1.495072841644287,
|
76624 |
+
"eval_runtime": 14.6337,
|
76625 |
+
"eval_samples_per_second": 3.007,
|
76626 |
+
"eval_steps_per_second": 0.205,
|
76627 |
+
"step": 7510
|
76628 |
+
},
|
76629 |
+
{
|
76630 |
+
"epoch": 0.9723921416318736,
|
76631 |
+
"grad_norm": 1.7337031603610027,
|
76632 |
+
"learning_rate": 2.893869667009233e-09,
|
76633 |
+
"loss": 2.573,
|
76634 |
+
"step": 7511
|
76635 |
+
},
|
76636 |
+
{
|
76637 |
+
"epoch": 0.9725216040392272,
|
76638 |
+
"grad_norm": 1.516881266320023,
|
76639 |
+
"learning_rate": 2.866781710102601e-09,
|
76640 |
+
"loss": 2.4393,
|
76641 |
+
"step": 7512
|
76642 |
+
},
|
76643 |
+
{
|
76644 |
+
"epoch": 0.9726510664465806,
|
76645 |
+
"grad_norm": 1.778019440023385,
|
76646 |
+
"learning_rate": 2.839820836515947e-09,
|
76647 |
+
"loss": 2.4497,
|
76648 |
+
"step": 7513
|
76649 |
+
},
|
76650 |
+
{
|
76651 |
+
"epoch": 0.9727805288539341,
|
76652 |
+
"grad_norm": 1.649562667349022,
|
76653 |
+
"learning_rate": 2.812987051756394e-09,
|
76654 |
+
"loss": 2.3696,
|
76655 |
+
"step": 7514
|
76656 |
+
},
|
76657 |
+
{
|
76658 |
+
"epoch": 0.9729099912612875,
|
76659 |
+
"grad_norm": 1.6709675287516073,
|
76660 |
+
"learning_rate": 2.78628036130546e-09,
|
76661 |
+
"loss": 2.7583,
|
76662 |
+
"step": 7515
|
76663 |
+
},
|
76664 |
+
{
|
76665 |
+
"epoch": 0.9729099912612875,
|
76666 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76667 |
+
"eval_PRM F1": 0.047058823529411764,
|
76668 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76669 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76670 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76671 |
+
"eval_PRM Precision": 1.0,
|
76672 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76673 |
+
"eval_PRM Specificty": 1.0,
|
76674 |
+
"eval_loss": 1.4964045286178589,
|
76675 |
+
"eval_runtime": 14.1338,
|
76676 |
+
"eval_samples_per_second": 3.113,
|
76677 |
+
"eval_steps_per_second": 0.212,
|
76678 |
+
"step": 7515
|
76679 |
+
},
|
76680 |
+
{
|
76681 |
+
"epoch": 0.973039453668641,
|
76682 |
+
"grad_norm": 1.208623897904988,
|
76683 |
+
"learning_rate": 2.7597007706184344e-09,
|
76684 |
+
"loss": 2.2378,
|
76685 |
+
"step": 7516
|
76686 |
+
},
|
76687 |
+
{
|
76688 |
+
"epoch": 0.9731689160759944,
|
76689 |
+
"grad_norm": 1.3760642333243103,
|
76690 |
+
"learning_rate": 2.7332482851247926e-09,
|
76691 |
+
"loss": 2.4258,
|
76692 |
+
"step": 7517
|
76693 |
+
},
|
76694 |
+
{
|
76695 |
+
"epoch": 0.9732983784833479,
|
76696 |
+
"grad_norm": 3.037334589617975,
|
76697 |
+
"learning_rate": 2.7069229102279217e-09,
|
76698 |
+
"loss": 2.3164,
|
76699 |
+
"step": 7518
|
76700 |
+
},
|
76701 |
+
{
|
76702 |
+
"epoch": 0.9734278408907013,
|
76703 |
+
"grad_norm": 1.5660663366500944,
|
76704 |
+
"learning_rate": 2.680724651305325e-09,
|
76705 |
+
"loss": 2.1655,
|
76706 |
+
"step": 7519
|
76707 |
+
},
|
76708 |
+
{
|
76709 |
+
"epoch": 0.9735573032980548,
|
76710 |
+
"grad_norm": 2.3157849481641852,
|
76711 |
+
"learning_rate": 2.6546535137086244e-09,
|
76712 |
+
"loss": 2.5334,
|
76713 |
+
"step": 7520
|
76714 |
+
},
|
76715 |
+
{
|
76716 |
+
"epoch": 0.9735573032980548,
|
76717 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76718 |
+
"eval_PRM F1": 0.047058823529411764,
|
76719 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76720 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76721 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76722 |
+
"eval_PRM Precision": 1.0,
|
76723 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76724 |
+
"eval_PRM Specificty": 1.0,
|
76725 |
+
"eval_loss": 1.4964932203292847,
|
76726 |
+
"eval_runtime": 14.4152,
|
76727 |
+
"eval_samples_per_second": 3.052,
|
76728 |
+
"eval_steps_per_second": 0.208,
|
76729 |
+
"step": 7520
|
76730 |
+
},
|
76731 |
+
{
|
76732 |
+
"epoch": 0.9736867657054082,
|
76733 |
+
"grad_norm": 1.6032188100029758,
|
76734 |
+
"learning_rate": 2.6287095027632824e-09,
|
76735 |
+
"loss": 2.3384,
|
76736 |
+
"step": 7521
|
76737 |
+
},
|
76738 |
+
{
|
76739 |
+
"epoch": 0.9738162281127618,
|
76740 |
+
"grad_norm": 1.6009070614900751,
|
76741 |
+
"learning_rate": 2.6028926237689488e-09,
|
76742 |
+
"loss": 2.4028,
|
76743 |
+
"step": 7522
|
76744 |
+
},
|
76745 |
+
{
|
76746 |
+
"epoch": 0.9739456905201153,
|
76747 |
+
"grad_norm": 1.935103516636611,
|
76748 |
+
"learning_rate": 2.577202881999183e-09,
|
76749 |
+
"loss": 2.2874,
|
76750 |
+
"step": 7523
|
76751 |
+
},
|
76752 |
+
{
|
76753 |
+
"epoch": 0.9740751529274687,
|
76754 |
+
"grad_norm": 1.8155538775672537,
|
76755 |
+
"learning_rate": 2.5516402827016617e-09,
|
76756 |
+
"loss": 2.3831,
|
76757 |
+
"step": 7524
|
76758 |
+
},
|
76759 |
+
{
|
76760 |
+
"epoch": 0.9742046153348222,
|
76761 |
+
"grad_norm": 1.0483947077967555,
|
76762 |
+
"learning_rate": 2.5262048310980418e-09,
|
76763 |
+
"loss": 2.1833,
|
76764 |
+
"step": 7525
|
76765 |
+
},
|
76766 |
+
{
|
76767 |
+
"epoch": 0.9742046153348222,
|
76768 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76769 |
+
"eval_PRM F1": 0.047058823529411764,
|
76770 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76771 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76772 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76773 |
+
"eval_PRM Precision": 1.0,
|
76774 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76775 |
+
"eval_PRM Specificty": 1.0,
|
76776 |
+
"eval_loss": 1.495805263519287,
|
76777 |
+
"eval_runtime": 14.1041,
|
76778 |
+
"eval_samples_per_second": 3.12,
|
76779 |
+
"eval_steps_per_second": 0.213,
|
76780 |
+
"step": 7525
|
76781 |
+
},
|
76782 |
+
{
|
76783 |
+
"epoch": 0.9743340777421756,
|
76784 |
+
"grad_norm": 1.552266890430507,
|
76785 |
+
"learning_rate": 2.500896532384098e-09,
|
76786 |
+
"loss": 2.1396,
|
76787 |
+
"step": 7526
|
76788 |
+
},
|
76789 |
+
{
|
76790 |
+
"epoch": 0.9744635401495291,
|
76791 |
+
"grad_norm": 4.162519199111511,
|
76792 |
+
"learning_rate": 2.4757153917295136e-09,
|
76793 |
+
"loss": 3.0793,
|
76794 |
+
"step": 7527
|
76795 |
+
},
|
76796 |
+
{
|
76797 |
+
"epoch": 0.9745930025568825,
|
76798 |
+
"grad_norm": 1.4020772771060606,
|
76799 |
+
"learning_rate": 2.4506614142780216e-09,
|
76800 |
+
"loss": 2.2017,
|
76801 |
+
"step": 7528
|
76802 |
+
},
|
76803 |
+
{
|
76804 |
+
"epoch": 0.974722464964236,
|
76805 |
+
"grad_norm": 2.257257302308101,
|
76806 |
+
"learning_rate": 2.4257346051474034e-09,
|
76807 |
+
"loss": 2.3549,
|
76808 |
+
"step": 7529
|
76809 |
+
},
|
76810 |
+
{
|
76811 |
+
"epoch": 0.9748519273715894,
|
76812 |
+
"grad_norm": 2.304261799764049,
|
76813 |
+
"learning_rate": 2.400934969429558e-09,
|
76814 |
+
"loss": 2.4275,
|
76815 |
+
"step": 7530
|
76816 |
+
},
|
76817 |
+
{
|
76818 |
+
"epoch": 0.9748519273715894,
|
76819 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76820 |
+
"eval_PRM F1": 0.047058823529411764,
|
76821 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76822 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76823 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76824 |
+
"eval_PRM Precision": 1.0,
|
76825 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76826 |
+
"eval_PRM Specificty": 1.0,
|
76827 |
+
"eval_loss": 1.495072841644287,
|
76828 |
+
"eval_runtime": 13.7187,
|
76829 |
+
"eval_samples_per_second": 3.207,
|
76830 |
+
"eval_steps_per_second": 0.219,
|
76831 |
+
"step": 7530
|
76832 |
+
},
|
76833 |
+
{
|
76834 |
+
"epoch": 0.9749813897789429,
|
76835 |
+
"grad_norm": 1.358154053410943,
|
76836 |
+
"learning_rate": 2.376262512190225e-09,
|
76837 |
+
"loss": 2.2069,
|
76838 |
+
"step": 7531
|
76839 |
+
},
|
76840 |
+
{
|
76841 |
+
"epoch": 0.9751108521862965,
|
76842 |
+
"grad_norm": 1.9448873246460772,
|
76843 |
+
"learning_rate": 2.351717238469331e-09,
|
76844 |
+
"loss": 2.4498,
|
76845 |
+
"step": 7532
|
76846 |
+
},
|
76847 |
+
{
|
76848 |
+
"epoch": 0.9752403145936499,
|
76849 |
+
"grad_norm": 2.323208439137572,
|
76850 |
+
"learning_rate": 2.3272991532807126e-09,
|
76851 |
+
"loss": 2.5518,
|
76852 |
+
"step": 7533
|
76853 |
+
},
|
76854 |
+
{
|
76855 |
+
"epoch": 0.9753697770010034,
|
76856 |
+
"grad_norm": 1.9000478401818919,
|
76857 |
+
"learning_rate": 2.3030082616121858e-09,
|
76858 |
+
"loss": 2.2683,
|
76859 |
+
"step": 7534
|
76860 |
+
},
|
76861 |
+
{
|
76862 |
+
"epoch": 0.9754992394083568,
|
76863 |
+
"grad_norm": 2.0009142246658937,
|
76864 |
+
"learning_rate": 2.2788445684256836e-09,
|
76865 |
+
"loss": 2.2664,
|
76866 |
+
"step": 7535
|
76867 |
+
},
|
76868 |
+
{
|
76869 |
+
"epoch": 0.9754992394083568,
|
76870 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76871 |
+
"eval_PRM F1": 0.047058823529411764,
|
76872 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76873 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76874 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76875 |
+
"eval_PRM Precision": 1.0,
|
76876 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76877 |
+
"eval_PRM Specificty": 1.0,
|
76878 |
+
"eval_loss": 1.4978693723678589,
|
76879 |
+
"eval_runtime": 14.3015,
|
76880 |
+
"eval_samples_per_second": 3.077,
|
76881 |
+
"eval_steps_per_second": 0.21,
|
76882 |
+
"step": 7535
|
76883 |
+
},
|
76884 |
+
{
|
76885 |
+
"epoch": 0.9756287018157103,
|
76886 |
+
"grad_norm": 1.3166239883775896,
|
76887 |
+
"learning_rate": 2.254808078657189e-09,
|
76888 |
+
"loss": 2.3547,
|
76889 |
+
"step": 7536
|
76890 |
+
},
|
76891 |
+
{
|
76892 |
+
"epoch": 0.9757581642230637,
|
76893 |
+
"grad_norm": 3.8384585910489326,
|
76894 |
+
"learning_rate": 2.230898797216663e-09,
|
76895 |
+
"loss": 2.3691,
|
76896 |
+
"step": 7537
|
76897 |
+
},
|
76898 |
+
{
|
76899 |
+
"epoch": 0.9758876266304172,
|
76900 |
+
"grad_norm": 1.5529322232040648,
|
76901 |
+
"learning_rate": 2.207116728987976e-09,
|
76902 |
+
"loss": 2.4724,
|
76903 |
+
"step": 7538
|
76904 |
+
},
|
76905 |
+
{
|
76906 |
+
"epoch": 0.9760170890377706,
|
76907 |
+
"grad_norm": 1.2127774713992632,
|
76908 |
+
"learning_rate": 2.1834618788291183e-09,
|
76909 |
+
"loss": 2.2393,
|
76910 |
+
"step": 7539
|
76911 |
+
},
|
76912 |
+
{
|
76913 |
+
"epoch": 0.9761465514451241,
|
76914 |
+
"grad_norm": 2.1174450047835274,
|
76915 |
+
"learning_rate": 2.1599342515720573e-09,
|
76916 |
+
"loss": 2.335,
|
76917 |
+
"step": 7540
|
76918 |
+
},
|
76919 |
+
{
|
76920 |
+
"epoch": 0.9761465514451241,
|
76921 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76922 |
+
"eval_PRM F1": 0.047058823529411764,
|
76923 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76924 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76925 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76926 |
+
"eval_PRM Precision": 1.0,
|
76927 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76928 |
+
"eval_PRM Specificty": 1.0,
|
76929 |
+
"eval_loss": 1.493696689605713,
|
76930 |
+
"eval_runtime": 14.3755,
|
76931 |
+
"eval_samples_per_second": 3.061,
|
76932 |
+
"eval_steps_per_second": 0.209,
|
76933 |
+
"step": 7540
|
76934 |
+
},
|
76935 |
+
{
|
76936 |
+
"epoch": 0.9762760138524776,
|
76937 |
+
"grad_norm": 2.483136951823826,
|
76938 |
+
"learning_rate": 2.136533852022879e-09,
|
76939 |
+
"loss": 2.4524,
|
76940 |
+
"step": 7541
|
76941 |
+
},
|
76942 |
+
{
|
76943 |
+
"epoch": 0.9764054762598311,
|
76944 |
+
"grad_norm": 1.5511048057136474,
|
76945 |
+
"learning_rate": 2.11326068496151e-09,
|
76946 |
+
"loss": 2.4346,
|
76947 |
+
"step": 7542
|
76948 |
+
},
|
76949 |
+
{
|
76950 |
+
"epoch": 0.9765349386671845,
|
76951 |
+
"grad_norm": 1.8425095487094147,
|
76952 |
+
"learning_rate": 2.0901147551419948e-09,
|
76953 |
+
"loss": 2.3542,
|
76954 |
+
"step": 7543
|
76955 |
+
},
|
76956 |
+
{
|
76957 |
+
"epoch": 0.976664401074538,
|
76958 |
+
"grad_norm": 1.591283586442896,
|
76959 |
+
"learning_rate": 2.0670960672923567e-09,
|
76960 |
+
"loss": 2.2773,
|
76961 |
+
"step": 7544
|
76962 |
+
},
|
76963 |
+
{
|
76964 |
+
"epoch": 0.9767938634818915,
|
76965 |
+
"grad_norm": 3.219894890994954,
|
76966 |
+
"learning_rate": 2.044204626114599e-09,
|
76967 |
+
"loss": 2.686,
|
76968 |
+
"step": 7545
|
76969 |
+
},
|
76970 |
+
{
|
76971 |
+
"epoch": 0.9767938634818915,
|
76972 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
76973 |
+
"eval_PRM F1": 0.047058823529411764,
|
76974 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
76975 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
76976 |
+
"eval_PRM NPV": 0.22115384615384615,
|
76977 |
+
"eval_PRM Precision": 1.0,
|
76978 |
+
"eval_PRM Recall": 0.024096385542168676,
|
76979 |
+
"eval_PRM Specificty": 1.0,
|
76980 |
+
"eval_loss": 1.4943181276321411,
|
76981 |
+
"eval_runtime": 14.3109,
|
76982 |
+
"eval_samples_per_second": 3.075,
|
76983 |
+
"eval_steps_per_second": 0.21,
|
76984 |
+
"step": 7545
|
76985 |
+
},
|
76986 |
+
{
|
76987 |
+
"epoch": 0.9769233258892449,
|
76988 |
+
"grad_norm": 1.6948323938636074,
|
76989 |
+
"learning_rate": 2.021440436284841e-09,
|
76990 |
+
"loss": 2.5454,
|
76991 |
+
"step": 7546
|
76992 |
+
},
|
76993 |
+
{
|
76994 |
+
"epoch": 0.9770527882965984,
|
76995 |
+
"grad_norm": 2.0809224454371016,
|
76996 |
+
"learning_rate": 1.9988035024529756e-09,
|
76997 |
+
"loss": 2.2063,
|
76998 |
+
"step": 7547
|
76999 |
+
},
|
77000 |
+
{
|
77001 |
+
"epoch": 0.9771822507039518,
|
77002 |
+
"grad_norm": 1.8854420394243903,
|
77003 |
+
"learning_rate": 1.97629382924322e-09,
|
77004 |
+
"loss": 2.2332,
|
77005 |
+
"step": 7548
|
77006 |
+
},
|
77007 |
+
{
|
77008 |
+
"epoch": 0.9773117131113053,
|
77009 |
+
"grad_norm": 2.5643896611272123,
|
77010 |
+
"learning_rate": 1.9539114212535625e-09,
|
77011 |
+
"loss": 2.293,
|
77012 |
+
"step": 7549
|
77013 |
+
},
|
77014 |
+
{
|
77015 |
+
"epoch": 0.9774411755186587,
|
77016 |
+
"grad_norm": 1.1925655779849842,
|
77017 |
+
"learning_rate": 1.931656283056041e-09,
|
77018 |
+
"loss": 2.1436,
|
77019 |
+
"step": 7550
|
77020 |
+
},
|
77021 |
+
{
|
77022 |
+
"epoch": 0.9774411755186587,
|
77023 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77024 |
+
"eval_PRM F1": 0.047058823529411764,
|
77025 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77026 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77027 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77028 |
+
"eval_PRM Precision": 1.0,
|
77029 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77030 |
+
"eval_PRM Specificty": 1.0,
|
77031 |
+
"eval_loss": 1.495649814605713,
|
77032 |
+
"eval_runtime": 14.3042,
|
77033 |
+
"eval_samples_per_second": 3.076,
|
77034 |
+
"eval_steps_per_second": 0.21,
|
77035 |
+
"step": 7550
|
77036 |
+
},
|
77037 |
+
{
|
77038 |
+
"epoch": 0.9775706379260123,
|
77039 |
+
"grad_norm": 6.1310483327674925,
|
77040 |
+
"learning_rate": 1.90952841919681e-09,
|
77041 |
+
"loss": 2.8223,
|
77042 |
+
"step": 7551
|
77043 |
+
},
|
77044 |
+
{
|
77045 |
+
"epoch": 0.9777001003333657,
|
77046 |
+
"grad_norm": 1.8490057934229254,
|
77047 |
+
"learning_rate": 1.887527834195796e-09,
|
77048 |
+
"loss": 2.2754,
|
77049 |
+
"step": 7552
|
77050 |
+
},
|
77051 |
+
{
|
77052 |
+
"epoch": 0.9778295627407192,
|
77053 |
+
"grad_norm": 1.9545709254230654,
|
77054 |
+
"learning_rate": 1.8656545325471122e-09,
|
77055 |
+
"loss": 2.3342,
|
77056 |
+
"step": 7553
|
77057 |
+
},
|
77058 |
+
{
|
77059 |
+
"epoch": 0.9779590251480726,
|
77060 |
+
"grad_norm": 1.7527908462995896,
|
77061 |
+
"learning_rate": 1.8439085187189205e-09,
|
77062 |
+
"loss": 2.324,
|
77063 |
+
"step": 7554
|
77064 |
+
},
|
77065 |
+
{
|
77066 |
+
"epoch": 0.9780884875554261,
|
77067 |
+
"grad_norm": 1.8819808845260853,
|
77068 |
+
"learning_rate": 1.822289797153154e-09,
|
77069 |
+
"loss": 2.3325,
|
77070 |
+
"step": 7555
|
77071 |
+
},
|
77072 |
+
{
|
77073 |
+
"epoch": 0.9780884875554261,
|
77074 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77075 |
+
"eval_PRM F1": 0.047058823529411764,
|
77076 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77077 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77078 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77079 |
+
"eval_PRM Precision": 1.0,
|
77080 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77081 |
+
"eval_PRM Specificty": 1.0,
|
77082 |
+
"eval_loss": 1.4964932203292847,
|
77083 |
+
"eval_runtime": 13.6691,
|
77084 |
+
"eval_samples_per_second": 3.219,
|
77085 |
+
"eval_steps_per_second": 0.219,
|
77086 |
+
"step": 7555
|
77087 |
+
},
|
77088 |
+
{
|
77089 |
+
"epoch": 0.9782179499627796,
|
77090 |
+
"grad_norm": 1.9981064278922835,
|
77091 |
+
"learning_rate": 1.8007983722660021e-09,
|
77092 |
+
"loss": 2.519,
|
77093 |
+
"step": 7556
|
77094 |
+
},
|
77095 |
+
{
|
77096 |
+
"epoch": 0.978347412370133,
|
77097 |
+
"grad_norm": 2.209991202418474,
|
77098 |
+
"learning_rate": 1.7794342484474259e-09,
|
77099 |
+
"loss": 2.4434,
|
77100 |
+
"step": 7557
|
77101 |
+
},
|
77102 |
+
{
|
77103 |
+
"epoch": 0.9784768747774865,
|
77104 |
+
"grad_norm": 2.282220337288573,
|
77105 |
+
"learning_rate": 1.758197430061434e-09,
|
77106 |
+
"loss": 2.5867,
|
77107 |
+
"step": 7558
|
77108 |
+
},
|
77109 |
+
{
|
77110 |
+
"epoch": 0.9786063371848399,
|
77111 |
+
"grad_norm": 2.386537577649605,
|
77112 |
+
"learning_rate": 1.7370879214462232e-09,
|
77113 |
+
"loss": 2.3914,
|
77114 |
+
"step": 7559
|
77115 |
+
},
|
77116 |
+
{
|
77117 |
+
"epoch": 0.9787357995921934,
|
77118 |
+
"grad_norm": 1.4211663268329082,
|
77119 |
+
"learning_rate": 1.7161057269138303e-09,
|
77120 |
+
"loss": 2.387,
|
77121 |
+
"step": 7560
|
77122 |
+
},
|
77123 |
+
{
|
77124 |
+
"epoch": 0.9787357995921934,
|
77125 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77126 |
+
"eval_PRM F1": 0.047058823529411764,
|
77127 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77128 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77129 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77130 |
+
"eval_PRM Precision": 1.0,
|
77131 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77132 |
+
"eval_PRM Specificty": 1.0,
|
77133 |
+
"eval_loss": 1.494384765625,
|
77134 |
+
"eval_runtime": 14.0268,
|
77135 |
+
"eval_samples_per_second": 3.137,
|
77136 |
+
"eval_steps_per_second": 0.214,
|
77137 |
+
"step": 7560
|
77138 |
+
},
|
77139 |
+
{
|
77140 |
+
"epoch": 0.9788652619995469,
|
77141 |
+
"grad_norm": 2.1706050298282404,
|
77142 |
+
"learning_rate": 1.6952508507501324e-09,
|
77143 |
+
"loss": 2.1416,
|
77144 |
+
"step": 7561
|
77145 |
+
},
|
77146 |
+
{
|
77147 |
+
"epoch": 0.9789947244069004,
|
77148 |
+
"grad_norm": 1.4697119638840281,
|
77149 |
+
"learning_rate": 1.6745232972152637e-09,
|
77150 |
+
"loss": 2.3052,
|
77151 |
+
"step": 7562
|
77152 |
+
},
|
77153 |
+
{
|
77154 |
+
"epoch": 0.9791241868142538,
|
77155 |
+
"grad_norm": 1.9887583015605543,
|
77156 |
+
"learning_rate": 1.6539230705432675e-09,
|
77157 |
+
"loss": 2.7405,
|
77158 |
+
"step": 7563
|
77159 |
+
},
|
77160 |
+
{
|
77161 |
+
"epoch": 0.9792536492216073,
|
77162 |
+
"grad_norm": 1.3002785682904727,
|
77163 |
+
"learning_rate": 1.6334501749421667e-09,
|
77164 |
+
"loss": 2.3696,
|
77165 |
+
"step": 7564
|
77166 |
+
},
|
77167 |
+
{
|
77168 |
+
"epoch": 0.9793831116289607,
|
77169 |
+
"grad_norm": 4.01701817403821,
|
77170 |
+
"learning_rate": 1.613104614593894e-09,
|
77171 |
+
"loss": 2.3389,
|
77172 |
+
"step": 7565
|
77173 |
+
},
|
77174 |
+
{
|
77175 |
+
"epoch": 0.9793831116289607,
|
77176 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77177 |
+
"eval_PRM F1": 0.047058823529411764,
|
77178 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77179 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77180 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77181 |
+
"eval_PRM Precision": 1.0,
|
77182 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77183 |
+
"eval_PRM Specificty": 1.0,
|
77184 |
+
"eval_loss": 1.498579502105713,
|
77185 |
+
"eval_runtime": 15.3295,
|
77186 |
+
"eval_samples_per_second": 2.87,
|
77187 |
+
"eval_steps_per_second": 0.196,
|
77188 |
+
"step": 7565
|
77189 |
+
},
|
77190 |
+
{
|
77191 |
+
"epoch": 0.9795125740363142,
|
77192 |
+
"grad_norm": 2.82389856272306,
|
77193 |
+
"learning_rate": 1.5928863936544998e-09,
|
77194 |
+
"loss": 2.4026,
|
77195 |
+
"step": 7566
|
77196 |
+
},
|
77197 |
+
{
|
77198 |
+
"epoch": 0.9796420364436677,
|
77199 |
+
"grad_norm": 1.9015822937119669,
|
77200 |
+
"learning_rate": 1.5727955162539444e-09,
|
77201 |
+
"loss": 2.387,
|
77202 |
+
"step": 7567
|
77203 |
+
},
|
77204 |
+
{
|
77205 |
+
"epoch": 0.9797714988510211,
|
77206 |
+
"grad_norm": 1.1406419283583769,
|
77207 |
+
"learning_rate": 1.5528319864961672e-09,
|
77208 |
+
"loss": 2.1597,
|
77209 |
+
"step": 7568
|
77210 |
+
},
|
77211 |
+
{
|
77212 |
+
"epoch": 0.9799009612583746,
|
77213 |
+
"grad_norm": 2.1355981721634114,
|
77214 |
+
"learning_rate": 1.5329958084592255e-09,
|
77215 |
+
"loss": 2.3271,
|
77216 |
+
"step": 7569
|
77217 |
+
},
|
77218 |
+
{
|
77219 |
+
"epoch": 0.980030423665728,
|
77220 |
+
"grad_norm": 1.4628505282297621,
|
77221 |
+
"learning_rate": 1.5132869861948785e-09,
|
77222 |
+
"loss": 2.4343,
|
77223 |
+
"step": 7570
|
77224 |
+
},
|
77225 |
+
{
|
77226 |
+
"epoch": 0.980030423665728,
|
77227 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77228 |
+
"eval_PRM F1": 0.047058823529411764,
|
77229 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77230 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77231 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77232 |
+
"eval_PRM Precision": 1.0,
|
77233 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77234 |
+
"eval_PRM Specificty": 1.0,
|
77235 |
+
"eval_loss": 1.4957829713821411,
|
77236 |
+
"eval_runtime": 13.8423,
|
77237 |
+
"eval_samples_per_second": 3.179,
|
77238 |
+
"eval_steps_per_second": 0.217,
|
77239 |
+
"step": 7570
|
77240 |
+
},
|
77241 |
+
{
|
77242 |
+
"epoch": 0.9801598860730816,
|
77243 |
+
"grad_norm": 1.5826016277513821,
|
77244 |
+
"learning_rate": 1.4937055237292802e-09,
|
77245 |
+
"loss": 2.3635,
|
77246 |
+
"step": 7571
|
77247 |
+
},
|
77248 |
+
{
|
77249 |
+
"epoch": 0.980289348480435,
|
77250 |
+
"grad_norm": 1.2967653618589454,
|
77251 |
+
"learning_rate": 1.4742514250621481e-09,
|
77252 |
+
"loss": 2.2004,
|
77253 |
+
"step": 7572
|
77254 |
+
},
|
77255 |
+
{
|
77256 |
+
"epoch": 0.9804188108877885,
|
77257 |
+
"grad_norm": 1.348762418289272,
|
77258 |
+
"learning_rate": 1.454924694167456e-09,
|
77259 |
+
"loss": 2.439,
|
77260 |
+
"step": 7573
|
77261 |
+
},
|
77262 |
+
{
|
77263 |
+
"epoch": 0.9805482732951419,
|
77264 |
+
"grad_norm": 2.0059993316066187,
|
77265 |
+
"learning_rate": 1.4357253349931571e-09,
|
77266 |
+
"loss": 2.3491,
|
77267 |
+
"step": 7574
|
77268 |
+
},
|
77269 |
+
{
|
77270 |
+
"epoch": 0.9806777357024954,
|
77271 |
+
"grad_norm": 1.0696062980009842,
|
77272 |
+
"learning_rate": 1.4166533514609754e-09,
|
77273 |
+
"loss": 2.189,
|
77274 |
+
"step": 7575
|
77275 |
+
},
|
77276 |
+
{
|
77277 |
+
"epoch": 0.9806777357024954,
|
77278 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77279 |
+
"eval_PRM F1": 0.047058823529411764,
|
77280 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77281 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77282 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77283 |
+
"eval_PRM Precision": 1.0,
|
77284 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77285 |
+
"eval_PRM Specificty": 1.0,
|
77286 |
+
"eval_loss": 1.4971369504928589,
|
77287 |
+
"eval_runtime": 14.5855,
|
77288 |
+
"eval_samples_per_second": 3.017,
|
77289 |
+
"eval_steps_per_second": 0.206,
|
77290 |
+
"step": 7575
|
77291 |
+
},
|
77292 |
+
{
|
77293 |
+
"epoch": 0.9808071981098488,
|
77294 |
+
"grad_norm": 1.0643332651729815,
|
77295 |
+
"learning_rate": 1.3977087474667527e-09,
|
77296 |
+
"loss": 2.2942,
|
77297 |
+
"step": 7576
|
77298 |
+
},
|
77299 |
+
{
|
77300 |
+
"epoch": 0.9809366605172023,
|
77301 |
+
"grad_norm": 1.3466758091724915,
|
77302 |
+
"learning_rate": 1.3788915268804492e-09,
|
77303 |
+
"loss": 2.3136,
|
77304 |
+
"step": 7577
|
77305 |
+
},
|
77306 |
+
{
|
77307 |
+
"epoch": 0.9810661229245558,
|
77308 |
+
"grad_norm": 2.204496975992815,
|
77309 |
+
"learning_rate": 1.3602016935457262e-09,
|
77310 |
+
"loss": 2.4861,
|
77311 |
+
"step": 7578
|
77312 |
+
},
|
77313 |
+
{
|
77314 |
+
"epoch": 0.9811955853319092,
|
77315 |
+
"grad_norm": 1.414782482719819,
|
77316 |
+
"learning_rate": 1.3416392512804327e-09,
|
77317 |
+
"loss": 2.283,
|
77318 |
+
"step": 7579
|
77319 |
+
},
|
77320 |
+
{
|
77321 |
+
"epoch": 0.9813250477392627,
|
77322 |
+
"grad_norm": 2.985776601234628,
|
77323 |
+
"learning_rate": 1.3232042038762577e-09,
|
77324 |
+
"loss": 2.5293,
|
77325 |
+
"step": 7580
|
77326 |
+
},
|
77327 |
+
{
|
77328 |
+
"epoch": 0.9813250477392627,
|
77329 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77330 |
+
"eval_PRM F1": 0.047058823529411764,
|
77331 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77332 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77333 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77334 |
+
"eval_PRM Precision": 1.0,
|
77335 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77336 |
+
"eval_PRM Specificty": 1.0,
|
77337 |
+
"eval_loss": 1.4950283765792847,
|
77338 |
+
"eval_runtime": 14.9875,
|
77339 |
+
"eval_samples_per_second": 2.936,
|
77340 |
+
"eval_steps_per_second": 0.2,
|
77341 |
+
"step": 7580
|
77342 |
+
},
|
77343 |
+
{
|
77344 |
+
"epoch": 0.9814545101466162,
|
77345 |
+
"grad_norm": 2.130158472795342,
|
77346 |
+
"learning_rate": 1.3048965550990083e-09,
|
77347 |
+
"loss": 2.3813,
|
77348 |
+
"step": 7581
|
77349 |
+
},
|
77350 |
+
{
|
77351 |
+
"epoch": 0.9815839725539697,
|
77352 |
+
"grad_norm": 1.623329970728458,
|
77353 |
+
"learning_rate": 1.2867163086884015e-09,
|
77354 |
+
"loss": 2.3645,
|
77355 |
+
"step": 7582
|
77356 |
+
},
|
77357 |
+
{
|
77358 |
+
"epoch": 0.9817134349613231,
|
77359 |
+
"grad_norm": 1.9459265474325154,
|
77360 |
+
"learning_rate": 1.268663468358064e-09,
|
77361 |
+
"loss": 2.3744,
|
77362 |
+
"step": 7583
|
77363 |
+
},
|
77364 |
+
{
|
77365 |
+
"epoch": 0.9818428973686766,
|
77366 |
+
"grad_norm": 2.1195756554460385,
|
77367 |
+
"learning_rate": 1.2507380377956012e-09,
|
77368 |
+
"loss": 2.4797,
|
77369 |
+
"step": 7584
|
77370 |
+
},
|
77371 |
+
{
|
77372 |
+
"epoch": 0.98197235977603,
|
77373 |
+
"grad_norm": 1.7386627470623568,
|
77374 |
+
"learning_rate": 1.2329400206628068e-09,
|
77375 |
+
"loss": 2.4109,
|
77376 |
+
"step": 7585
|
77377 |
+
},
|
77378 |
+
{
|
77379 |
+
"epoch": 0.98197235977603,
|
77380 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77381 |
+
"eval_PRM F1": 0.047058823529411764,
|
77382 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77383 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77384 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77385 |
+
"eval_PRM Precision": 1.0,
|
77386 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77387 |
+
"eval_PRM Specificty": 1.0,
|
77388 |
+
"eval_loss": 1.4935635328292847,
|
77389 |
+
"eval_runtime": 13.6963,
|
77390 |
+
"eval_samples_per_second": 3.213,
|
77391 |
+
"eval_steps_per_second": 0.219,
|
77392 |
+
"step": 7585
|
77393 |
+
},
|
77394 |
+
{
|
77395 |
+
"epoch": 0.9821018221833835,
|
77396 |
+
"grad_norm": 1.1887944261227992,
|
77397 |
+
"learning_rate": 1.2152694205951054e-09,
|
77398 |
+
"loss": 2.2488,
|
77399 |
+
"step": 7586
|
77400 |
+
},
|
77401 |
+
{
|
77402 |
+
"epoch": 0.9822312845907369,
|
77403 |
+
"grad_norm": 2.586722476127736,
|
77404 |
+
"learning_rate": 1.1977262412022489e-09,
|
77405 |
+
"loss": 2.4194,
|
77406 |
+
"step": 7587
|
77407 |
+
},
|
77408 |
+
{
|
77409 |
+
"epoch": 0.9823607469980904,
|
77410 |
+
"grad_norm": 2.4786217471919127,
|
77411 |
+
"learning_rate": 1.18031048606769e-09,
|
77412 |
+
"loss": 2.8074,
|
77413 |
+
"step": 7588
|
77414 |
+
},
|
77415 |
+
{
|
77416 |
+
"epoch": 0.9824902094054438,
|
77417 |
+
"grad_norm": 3.7031030228112605,
|
77418 |
+
"learning_rate": 1.1630221587489304e-09,
|
77419 |
+
"loss": 2.2295,
|
77420 |
+
"step": 7589
|
77421 |
+
},
|
77422 |
+
{
|
77423 |
+
"epoch": 0.9826196718127974,
|
77424 |
+
"grad_norm": 2.26698201879527,
|
77425 |
+
"learning_rate": 1.1458612627775201e-09,
|
77426 |
+
"loss": 2.2422,
|
77427 |
+
"step": 7590
|
77428 |
+
},
|
77429 |
+
{
|
77430 |
+
"epoch": 0.9826196718127974,
|
77431 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77432 |
+
"eval_PRM F1": 0.047058823529411764,
|
77433 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77434 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77435 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77436 |
+
"eval_PRM Precision": 1.0,
|
77437 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77438 |
+
"eval_PRM Specificty": 1.0,
|
77439 |
+
"eval_loss": 1.4950283765792847,
|
77440 |
+
"eval_runtime": 14.7278,
|
77441 |
+
"eval_samples_per_second": 2.988,
|
77442 |
+
"eval_steps_per_second": 0.204,
|
77443 |
+
"step": 7590
|
77444 |
+
},
|
77445 |
+
{
|
77446 |
+
"epoch": 0.9827491342201509,
|
77447 |
+
"grad_norm": 1.8635644563124136,
|
77448 |
+
"learning_rate": 1.1288278016588493e-09,
|
77449 |
+
"loss": 2.2876,
|
77450 |
+
"step": 7591
|
77451 |
+
},
|
77452 |
+
{
|
77453 |
+
"epoch": 0.9828785966275043,
|
77454 |
+
"grad_norm": 3.6206585566509584,
|
77455 |
+
"learning_rate": 1.1119217788724268e-09,
|
77456 |
+
"loss": 2.5964,
|
77457 |
+
"step": 7592
|
77458 |
+
},
|
77459 |
+
{
|
77460 |
+
"epoch": 0.9830080590348578,
|
77461 |
+
"grad_norm": 1.868015361402047,
|
77462 |
+
"learning_rate": 1.095143197871601e-09,
|
77463 |
+
"loss": 2.5551,
|
77464 |
+
"step": 7593
|
77465 |
+
},
|
77466 |
+
{
|
77467 |
+
"epoch": 0.9831375214422112,
|
77468 |
+
"grad_norm": 1.266705226144988,
|
77469 |
+
"learning_rate": 1.078492062083769e-09,
|
77470 |
+
"loss": 2.3367,
|
77471 |
+
"step": 7594
|
77472 |
+
},
|
77473 |
+
{
|
77474 |
+
"epoch": 0.9832669838495647,
|
77475 |
+
"grad_norm": 1.8780513790793911,
|
77476 |
+
"learning_rate": 1.0619683749102384e-09,
|
77477 |
+
"loss": 2.2266,
|
77478 |
+
"step": 7595
|
77479 |
+
},
|
77480 |
+
{
|
77481 |
+
"epoch": 0.9832669838495647,
|
77482 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77483 |
+
"eval_PRM F1": 0.047058823529411764,
|
77484 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77485 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77486 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77487 |
+
"eval_PRM Precision": 1.0,
|
77488 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77489 |
+
"eval_PRM Specificty": 1.0,
|
77490 |
+
"eval_loss": 1.4957386255264282,
|
77491 |
+
"eval_runtime": 14.1659,
|
77492 |
+
"eval_samples_per_second": 3.106,
|
77493 |
+
"eval_steps_per_second": 0.212,
|
77494 |
+
"step": 7595
|
77495 |
+
},
|
77496 |
+
{
|
77497 |
+
"epoch": 0.9833964462569181,
|
77498 |
+
"grad_norm": 2.314347414973597,
|
77499 |
+
"learning_rate": 1.045572139726364e-09,
|
77500 |
+
"loss": 2.3955,
|
77501 |
+
"step": 7596
|
77502 |
+
},
|
77503 |
+
{
|
77504 |
+
"epoch": 0.9835259086642716,
|
77505 |
+
"grad_norm": 2.6351190170336465,
|
77506 |
+
"learning_rate": 1.029303359881273e-09,
|
77507 |
+
"loss": 2.2913,
|
77508 |
+
"step": 7597
|
77509 |
+
},
|
77510 |
+
{
|
77511 |
+
"epoch": 0.983655371071625,
|
77512 |
+
"grad_norm": 1.7546872803255078,
|
77513 |
+
"learning_rate": 1.0131620386983482e-09,
|
77514 |
+
"loss": 2.4309,
|
77515 |
+
"step": 7598
|
77516 |
+
},
|
77517 |
+
{
|
77518 |
+
"epoch": 0.9837848334789785,
|
77519 |
+
"grad_norm": 2.3215434065106426,
|
77520 |
+
"learning_rate": 9.971481794746745e-10,
|
77521 |
+
"loss": 2.1787,
|
77522 |
+
"step": 7599
|
77523 |
+
},
|
77524 |
+
{
|
77525 |
+
"epoch": 0.9839142958863321,
|
77526 |
+
"grad_norm": 1.2198424416141735,
|
77527 |
+
"learning_rate": 9.812617854814549e-10,
|
77528 |
+
"loss": 2.3247,
|
77529 |
+
"step": 7600
|
77530 |
+
},
|
77531 |
+
{
|
77532 |
+
"epoch": 0.9839142958863321,
|
77533 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77534 |
+
"eval_PRM F1": 0.047058823529411764,
|
77535 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77536 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77537 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77538 |
+
"eval_PRM Precision": 1.0,
|
77539 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77540 |
+
"eval_PRM Specificty": 1.0,
|
77541 |
+
"eval_loss": 1.4950283765792847,
|
77542 |
+
"eval_runtime": 14.5217,
|
77543 |
+
"eval_samples_per_second": 3.03,
|
77544 |
+
"eval_steps_per_second": 0.207,
|
77545 |
+
"step": 7600
|
77546 |
+
},
|
77547 |
+
{
|
77548 |
+
"epoch": 0.9840437582936855,
|
77549 |
+
"grad_norm": 1.519794772751305,
|
77550 |
+
"learning_rate": 9.655028599638017e-10,
|
77551 |
+
"loss": 2.5454,
|
77552 |
+
"step": 7601
|
77553 |
+
},
|
77554 |
+
{
|
77555 |
+
"epoch": 0.984173220701039,
|
77556 |
+
"grad_norm": 1.2600965157647652,
|
77557 |
+
"learning_rate": 9.498714061408065e-10,
|
77558 |
+
"loss": 2.21,
|
77559 |
+
"step": 7602
|
77560 |
+
},
|
77561 |
+
{
|
77562 |
+
"epoch": 0.9843026831083924,
|
77563 |
+
"grad_norm": 2.838040649791305,
|
77564 |
+
"learning_rate": 9.343674272054709e-10,
|
77565 |
+
"loss": 2.4016,
|
77566 |
+
"step": 7603
|
77567 |
+
},
|
77568 |
+
{
|
77569 |
+
"epoch": 0.9844321455157459,
|
77570 |
+
"grad_norm": 1.6408515614253465,
|
77571 |
+
"learning_rate": 9.189909263248448e-10,
|
77572 |
+
"loss": 2.3752,
|
77573 |
+
"step": 7604
|
77574 |
+
},
|
77575 |
+
{
|
77576 |
+
"epoch": 0.9845616079230993,
|
77577 |
+
"grad_norm": 2.1381858784410843,
|
77578 |
+
"learning_rate": 9.037419066398878e-10,
|
77579 |
+
"loss": 2.5369,
|
77580 |
+
"step": 7605
|
77581 |
+
},
|
77582 |
+
{
|
77583 |
+
"epoch": 0.9845616079230993,
|
77584 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77585 |
+
"eval_PRM F1": 0.047058823529411764,
|
77586 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77587 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77588 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77589 |
+
"eval_PRM Precision": 1.0,
|
77590 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77591 |
+
"eval_PRM Specificty": 1.0,
|
77592 |
+
"eval_loss": 1.4979802370071411,
|
77593 |
+
"eval_runtime": 13.3075,
|
77594 |
+
"eval_samples_per_second": 3.306,
|
77595 |
+
"eval_steps_per_second": 0.225,
|
77596 |
+
"step": 7605
|
77597 |
+
},
|
77598 |
+
{
|
77599 |
+
"epoch": 0.9846910703304528,
|
77600 |
+
"grad_norm": 1.5820801596214173,
|
77601 |
+
"learning_rate": 8.886203712654695e-10,
|
77602 |
+
"loss": 2.5869,
|
77603 |
+
"step": 7606
|
77604 |
+
},
|
77605 |
+
{
|
77606 |
+
"epoch": 0.9848205327378062,
|
77607 |
+
"grad_norm": 3.27107927874882,
|
77608 |
+
"learning_rate": 8.736263232905079e-10,
|
77609 |
+
"loss": 2.4082,
|
77610 |
+
"step": 7607
|
77611 |
+
},
|
77612 |
+
{
|
77613 |
+
"epoch": 0.9849499951451597,
|
77614 |
+
"grad_norm": 1.6374668708983278,
|
77615 |
+
"learning_rate": 8.587597657778307e-10,
|
77616 |
+
"loss": 2.4065,
|
77617 |
+
"step": 7608
|
77618 |
+
},
|
77619 |
+
{
|
77620 |
+
"epoch": 0.9850794575525131,
|
77621 |
+
"grad_norm": 1.727336438934298,
|
77622 |
+
"learning_rate": 8.440207017641755e-10,
|
77623 |
+
"loss": 2.5813,
|
77624 |
+
"step": 7609
|
77625 |
+
},
|
77626 |
+
{
|
77627 |
+
"epoch": 0.9852089199598667,
|
77628 |
+
"grad_norm": 1.6646088684556826,
|
77629 |
+
"learning_rate": 8.294091342604671e-10,
|
77630 |
+
"loss": 2.4685,
|
77631 |
+
"step": 7610
|
77632 |
+
},
|
77633 |
+
{
|
77634 |
+
"epoch": 0.9852089199598667,
|
77635 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77636 |
+
"eval_PRM F1": 0.047058823529411764,
|
77637 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77638 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77639 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77640 |
+
"eval_PRM Precision": 1.0,
|
77641 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77642 |
+
"eval_PRM Specificty": 1.0,
|
77643 |
+
"eval_loss": 1.4964932203292847,
|
77644 |
+
"eval_runtime": 14.4666,
|
77645 |
+
"eval_samples_per_second": 3.041,
|
77646 |
+
"eval_steps_per_second": 0.207,
|
77647 |
+
"step": 7610
|
77648 |
+
},
|
77649 |
+
{
|
77650 |
+
"epoch": 0.9853383823672202,
|
77651 |
+
"grad_norm": 1.6183130409645539,
|
77652 |
+
"learning_rate": 8.149250662512626e-10,
|
77653 |
+
"loss": 2.2349,
|
77654 |
+
"step": 7611
|
77655 |
+
},
|
77656 |
+
{
|
77657 |
+
"epoch": 0.9854678447745736,
|
77658 |
+
"grad_norm": 1.9724083536469152,
|
77659 |
+
"learning_rate": 8.005685006952368e-10,
|
77660 |
+
"loss": 2.4695,
|
77661 |
+
"step": 7612
|
77662 |
+
},
|
77663 |
+
{
|
77664 |
+
"epoch": 0.9855973071819271,
|
77665 |
+
"grad_norm": 1.3922509704379329,
|
77666 |
+
"learning_rate": 7.863394405251135e-10,
|
77667 |
+
"loss": 2.374,
|
77668 |
+
"step": 7613
|
77669 |
+
},
|
77670 |
+
{
|
77671 |
+
"epoch": 0.9857267695892805,
|
77672 |
+
"grad_norm": 2.1223017834335094,
|
77673 |
+
"learning_rate": 7.722378886473176e-10,
|
77674 |
+
"loss": 2.1777,
|
77675 |
+
"step": 7614
|
77676 |
+
},
|
77677 |
+
{
|
77678 |
+
"epoch": 0.985856231996634,
|
77679 |
+
"grad_norm": 2.718200331729673,
|
77680 |
+
"learning_rate": 7.582638479424614e-10,
|
77681 |
+
"loss": 2.4619,
|
77682 |
+
"step": 7615
|
77683 |
+
},
|
77684 |
+
{
|
77685 |
+
"epoch": 0.985856231996634,
|
77686 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77687 |
+
"eval_PRM F1": 0.047058823529411764,
|
77688 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77689 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77690 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77691 |
+
"eval_PRM Precision": 1.0,
|
77692 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77693 |
+
"eval_PRM Specificty": 1.0,
|
77694 |
+
"eval_loss": 1.493607997894287,
|
77695 |
+
"eval_runtime": 15.0409,
|
77696 |
+
"eval_samples_per_second": 2.925,
|
77697 |
+
"eval_steps_per_second": 0.199,
|
77698 |
+
"step": 7615
|
77699 |
+
},
|
77700 |
+
{
|
77701 |
+
"epoch": 0.9859856944039874,
|
77702 |
+
"grad_norm": 1.8789229257617968,
|
77703 |
+
"learning_rate": 7.444173212650673e-10,
|
77704 |
+
"loss": 2.3359,
|
77705 |
+
"step": 7616
|
77706 |
+
},
|
77707 |
+
{
|
77708 |
+
"epoch": 0.9861151568113409,
|
77709 |
+
"grad_norm": 2.58106500301192,
|
77710 |
+
"learning_rate": 7.306983114434979e-10,
|
77711 |
+
"loss": 2.5012,
|
77712 |
+
"step": 7617
|
77713 |
+
},
|
77714 |
+
{
|
77715 |
+
"epoch": 0.9862446192186943,
|
77716 |
+
"grad_norm": 1.7565557791717235,
|
77717 |
+
"learning_rate": 7.171068212801641e-10,
|
77718 |
+
"loss": 2.3738,
|
77719 |
+
"step": 7618
|
77720 |
+
},
|
77721 |
+
{
|
77722 |
+
"epoch": 0.9863740816260478,
|
77723 |
+
"grad_norm": 2.554639240543607,
|
77724 |
+
"learning_rate": 7.036428535513867e-10,
|
77725 |
+
"loss": 2.5596,
|
77726 |
+
"step": 7619
|
77727 |
+
},
|
77728 |
+
{
|
77729 |
+
"epoch": 0.9865035440334013,
|
77730 |
+
"grad_norm": 2.2603757514179184,
|
77731 |
+
"learning_rate": 6.903064110074659e-10,
|
77732 |
+
"loss": 2.4717,
|
77733 |
+
"step": 7620
|
77734 |
+
},
|
77735 |
+
{
|
77736 |
+
"epoch": 0.9865035440334013,
|
77737 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77738 |
+
"eval_PRM F1": 0.047058823529411764,
|
77739 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77740 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77741 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77742 |
+
"eval_PRM Precision": 1.0,
|
77743 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77744 |
+
"eval_PRM Specificty": 1.0,
|
77745 |
+
"eval_loss": 1.4978693723678589,
|
77746 |
+
"eval_runtime": 14.3112,
|
77747 |
+
"eval_samples_per_second": 3.075,
|
77748 |
+
"eval_steps_per_second": 0.21,
|
77749 |
+
"step": 7620
|
77750 |
+
},
|
77751 |
+
{
|
77752 |
+
"epoch": 0.9866330064407548,
|
77753 |
+
"grad_norm": 1.848935015750131,
|
77754 |
+
"learning_rate": 6.770974963726807e-10,
|
77755 |
+
"loss": 2.4548,
|
77756 |
+
"step": 7621
|
77757 |
+
},
|
77758 |
+
{
|
77759 |
+
"epoch": 0.9867624688481083,
|
77760 |
+
"grad_norm": 1.4956095265091809,
|
77761 |
+
"learning_rate": 6.640161123451505e-10,
|
77762 |
+
"loss": 2.1816,
|
77763 |
+
"step": 7622
|
77764 |
+
},
|
77765 |
+
{
|
77766 |
+
"epoch": 0.9868919312554617,
|
77767 |
+
"grad_norm": 1.630094864183574,
|
77768 |
+
"learning_rate": 6.510622615970436e-10,
|
77769 |
+
"loss": 2.3577,
|
77770 |
+
"step": 7623
|
77771 |
+
},
|
77772 |
+
{
|
77773 |
+
"epoch": 0.9870213936628152,
|
77774 |
+
"grad_norm": 3.1326216222556083,
|
77775 |
+
"learning_rate": 6.382359467744375e-10,
|
77776 |
+
"loss": 2.3452,
|
77777 |
+
"step": 7624
|
77778 |
+
},
|
77779 |
+
{
|
77780 |
+
"epoch": 0.9871508560701686,
|
77781 |
+
"grad_norm": 2.2543400031695144,
|
77782 |
+
"learning_rate": 6.255371704973895e-10,
|
77783 |
+
"loss": 2.4229,
|
77784 |
+
"step": 7625
|
77785 |
+
},
|
77786 |
+
{
|
77787 |
+
"epoch": 0.9871508560701686,
|
77788 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77789 |
+
"eval_PRM F1": 0.047058823529411764,
|
77790 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77791 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77792 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77793 |
+
"eval_PRM Precision": 1.0,
|
77794 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77795 |
+
"eval_PRM Specificty": 1.0,
|
77796 |
+
"eval_loss": 1.4950283765792847,
|
77797 |
+
"eval_runtime": 15.7043,
|
77798 |
+
"eval_samples_per_second": 2.802,
|
77799 |
+
"eval_steps_per_second": 0.191,
|
77800 |
+
"step": 7625
|
77801 |
+
},
|
77802 |
+
{
|
77803 |
+
"epoch": 0.9872803184775221,
|
77804 |
+
"grad_norm": 1.4943666856649598,
|
77805 |
+
"learning_rate": 6.129659353597967e-10,
|
77806 |
+
"loss": 2.377,
|
77807 |
+
"step": 7626
|
77808 |
+
},
|
77809 |
+
{
|
77810 |
+
"epoch": 0.9874097808848755,
|
77811 |
+
"grad_norm": 1.6970512279219263,
|
77812 |
+
"learning_rate": 6.005222439297437e-10,
|
77813 |
+
"loss": 2.458,
|
77814 |
+
"step": 7627
|
77815 |
+
},
|
77816 |
+
{
|
77817 |
+
"epoch": 0.987539243292229,
|
77818 |
+
"grad_norm": 1.360629472544963,
|
77819 |
+
"learning_rate": 5.882060987490168e-10,
|
77820 |
+
"loss": 2.2747,
|
77821 |
+
"step": 7628
|
77822 |
+
},
|
77823 |
+
{
|
77824 |
+
"epoch": 0.9876687056995825,
|
77825 |
+
"grad_norm": 1.947585735123565,
|
77826 |
+
"learning_rate": 5.760175023334507e-10,
|
77827 |
+
"loss": 2.4478,
|
77828 |
+
"step": 7629
|
77829 |
+
},
|
77830 |
+
{
|
77831 |
+
"epoch": 0.987798168106936,
|
77832 |
+
"grad_norm": 1.506017763428392,
|
77833 |
+
"learning_rate": 5.639564571727902e-10,
|
77834 |
+
"loss": 2.4949,
|
77835 |
+
"step": 7630
|
77836 |
+
},
|
77837 |
+
{
|
77838 |
+
"epoch": 0.987798168106936,
|
77839 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77840 |
+
"eval_PRM F1": 0.047058823529411764,
|
77841 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77842 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77843 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77844 |
+
"eval_PRM Precision": 1.0,
|
77845 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77846 |
+
"eval_PRM Specificty": 1.0,
|
77847 |
+
"eval_loss": 1.4949840307235718,
|
77848 |
+
"eval_runtime": 14.2747,
|
77849 |
+
"eval_samples_per_second": 3.082,
|
77850 |
+
"eval_steps_per_second": 0.21,
|
77851 |
+
"step": 7630
|
77852 |
+
},
|
77853 |
+
{
|
77854 |
+
"epoch": 0.9879276305142894,
|
77855 |
+
"grad_norm": 2.42281583678435,
|
77856 |
+
"learning_rate": 5.520229657307585e-10,
|
77857 |
+
"loss": 2.3638,
|
77858 |
+
"step": 7631
|
77859 |
+
},
|
77860 |
+
{
|
77861 |
+
"epoch": 0.9880570929216429,
|
77862 |
+
"grad_norm": 1.5200091251742374,
|
77863 |
+
"learning_rate": 5.402170304449894e-10,
|
77864 |
+
"loss": 2.3445,
|
77865 |
+
"step": 7632
|
77866 |
+
},
|
77867 |
+
{
|
77868 |
+
"epoch": 0.9881865553289964,
|
77869 |
+
"grad_norm": 3.56315214653513,
|
77870 |
+
"learning_rate": 5.285386537271647e-10,
|
77871 |
+
"loss": 2.4712,
|
77872 |
+
"step": 7633
|
77873 |
+
},
|
77874 |
+
{
|
77875 |
+
"epoch": 0.9883160177363498,
|
77876 |
+
"grad_norm": 1.4993946555982482,
|
77877 |
+
"learning_rate": 5.169878379628069e-10,
|
77878 |
+
"loss": 2.3345,
|
77879 |
+
"step": 7634
|
77880 |
+
},
|
77881 |
+
{
|
77882 |
+
"epoch": 0.9884454801437033,
|
77883 |
+
"grad_norm": 1.3766723430853567,
|
77884 |
+
"learning_rate": 5.055645855112784e-10,
|
77885 |
+
"loss": 2.2922,
|
77886 |
+
"step": 7635
|
77887 |
+
},
|
77888 |
+
{
|
77889 |
+
"epoch": 0.9884454801437033,
|
77890 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77891 |
+
"eval_PRM F1": 0.047058823529411764,
|
77892 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77893 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77894 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77895 |
+
"eval_PRM Precision": 1.0,
|
77896 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77897 |
+
"eval_PRM Specificty": 1.0,
|
77898 |
+
"eval_loss": 1.4942737817764282,
|
77899 |
+
"eval_runtime": 13.9902,
|
77900 |
+
"eval_samples_per_second": 3.145,
|
77901 |
+
"eval_steps_per_second": 0.214,
|
77902 |
+
"step": 7635
|
77903 |
+
},
|
77904 |
+
{
|
77905 |
+
"epoch": 0.9885749425510567,
|
77906 |
+
"grad_norm": 2.765105962883385,
|
77907 |
+
"learning_rate": 4.942688987061989e-10,
|
77908 |
+
"loss": 2.948,
|
77909 |
+
"step": 7636
|
77910 |
+
},
|
77911 |
+
{
|
77912 |
+
"epoch": 0.9887044049584102,
|
77913 |
+
"grad_norm": 1.7884727220792906,
|
77914 |
+
"learning_rate": 4.831007798548199e-10,
|
77915 |
+
"loss": 2.4609,
|
77916 |
+
"step": 7637
|
77917 |
+
},
|
77918 |
+
{
|
77919 |
+
"epoch": 0.9888338673657636,
|
77920 |
+
"grad_norm": 2.585771279521546,
|
77921 |
+
"learning_rate": 4.720602312385109e-10,
|
77922 |
+
"loss": 2.498,
|
77923 |
+
"step": 7638
|
77924 |
+
},
|
77925 |
+
{
|
77926 |
+
"epoch": 0.9889633297731172,
|
77927 |
+
"grad_norm": 1.6577037565047226,
|
77928 |
+
"learning_rate": 4.611472551124818e-10,
|
77929 |
+
"loss": 2.323,
|
77930 |
+
"step": 7639
|
77931 |
+
},
|
77932 |
+
{
|
77933 |
+
"epoch": 0.9890927921804706,
|
77934 |
+
"grad_norm": 2.742440349149222,
|
77935 |
+
"learning_rate": 4.5036185370592176e-10,
|
77936 |
+
"loss": 2.7051,
|
77937 |
+
"step": 7640
|
77938 |
+
},
|
77939 |
+
{
|
77940 |
+
"epoch": 0.9890927921804706,
|
77941 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77942 |
+
"eval_PRM F1": 0.047058823529411764,
|
77943 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77944 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77945 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77946 |
+
"eval_PRM Precision": 1.0,
|
77947 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77948 |
+
"eval_PRM Specificty": 1.0,
|
77949 |
+
"eval_loss": 1.4964932203292847,
|
77950 |
+
"eval_runtime": 14.2938,
|
77951 |
+
"eval_samples_per_second": 3.078,
|
77952 |
+
"eval_steps_per_second": 0.21,
|
77953 |
+
"step": 7640
|
77954 |
+
},
|
77955 |
+
{
|
77956 |
+
"epoch": 0.9892222545878241,
|
77957 |
+
"grad_norm": 1.899570315318355,
|
77958 |
+
"learning_rate": 4.3970402922199895e-10,
|
77959 |
+
"loss": 2.3279,
|
77960 |
+
"step": 7641
|
77961 |
+
},
|
77962 |
+
{
|
77963 |
+
"epoch": 0.9893517169951775,
|
77964 |
+
"grad_norm": 1.429668343151256,
|
77965 |
+
"learning_rate": 4.291737838378607e-10,
|
77966 |
+
"loss": 2.2539,
|
77967 |
+
"step": 7642
|
77968 |
+
},
|
77969 |
+
{
|
77970 |
+
"epoch": 0.989481179402531,
|
77971 |
+
"grad_norm": 1.666726346466479,
|
77972 |
+
"learning_rate": 4.187711197043559e-10,
|
77973 |
+
"loss": 2.2537,
|
77974 |
+
"step": 7643
|
77975 |
+
},
|
77976 |
+
{
|
77977 |
+
"epoch": 0.9896106418098845,
|
77978 |
+
"grad_norm": 1.7583614698230214,
|
77979 |
+
"learning_rate": 4.0849603894659027e-10,
|
77980 |
+
"loss": 2.3398,
|
77981 |
+
"step": 7644
|
77982 |
+
},
|
77983 |
+
{
|
77984 |
+
"epoch": 0.9897401042172379,
|
77985 |
+
"grad_norm": 1.218907723659834,
|
77986 |
+
"learning_rate": 3.9834854366344044e-10,
|
77987 |
+
"loss": 2.1571,
|
77988 |
+
"step": 7645
|
77989 |
+
},
|
77990 |
+
{
|
77991 |
+
"epoch": 0.9897401042172379,
|
77992 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
77993 |
+
"eval_PRM F1": 0.047058823529411764,
|
77994 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
77995 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
77996 |
+
"eval_PRM NPV": 0.22115384615384615,
|
77997 |
+
"eval_PRM Precision": 1.0,
|
77998 |
+
"eval_PRM Recall": 0.024096385542168676,
|
77999 |
+
"eval_PRM Specificty": 1.0,
|
78000 |
+
"eval_loss": 1.4964932203292847,
|
78001 |
+
"eval_runtime": 13.6451,
|
78002 |
+
"eval_samples_per_second": 3.225,
|
78003 |
+
"eval_steps_per_second": 0.22,
|
78004 |
+
"step": 7645
|
78005 |
+
},
|
78006 |
+
{
|
78007 |
+
"epoch": 0.9898695666245914,
|
78008 |
+
"grad_norm": 2.16683363153041,
|
78009 |
+
"learning_rate": 3.8832863592762336e-10,
|
78010 |
+
"loss": 2.4543,
|
78011 |
+
"step": 7646
|
78012 |
+
},
|
78013 |
+
{
|
78014 |
+
"epoch": 0.9899990290319448,
|
78015 |
+
"grad_norm": 1.8880776975893787,
|
78016 |
+
"learning_rate": 3.7843631778604337e-10,
|
78017 |
+
"loss": 2.314,
|
78018 |
+
"step": 7647
|
78019 |
+
},
|
78020 |
+
{
|
78021 |
+
"epoch": 0.9901284914392983,
|
78022 |
+
"grad_norm": 1.913928145061539,
|
78023 |
+
"learning_rate": 3.686715912593064e-10,
|
78024 |
+
"loss": 2.385,
|
78025 |
+
"step": 7648
|
78026 |
+
},
|
78027 |
+
{
|
78028 |
+
"epoch": 0.9902579538466518,
|
78029 |
+
"grad_norm": 2.598615507596037,
|
78030 |
+
"learning_rate": 3.5903445834213625e-10,
|
78031 |
+
"loss": 2.4478,
|
78032 |
+
"step": 7649
|
78033 |
+
},
|
78034 |
+
{
|
78035 |
+
"epoch": 0.9903874162540053,
|
78036 |
+
"grad_norm": 2.781411612118827,
|
78037 |
+
"learning_rate": 3.4952492100302773e-10,
|
78038 |
+
"loss": 2.5112,
|
78039 |
+
"step": 7650
|
78040 |
+
},
|
78041 |
+
{
|
78042 |
+
"epoch": 0.9903874162540053,
|
78043 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78044 |
+
"eval_PRM F1": 0.047058823529411764,
|
78045 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78046 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78047 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78048 |
+
"eval_PRM Precision": 1.0,
|
78049 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78050 |
+
"eval_PRM Specificty": 1.0,
|
78051 |
+
"eval_loss": 1.4964488744735718,
|
78052 |
+
"eval_runtime": 14.049,
|
78053 |
+
"eval_samples_per_second": 3.132,
|
78054 |
+
"eval_steps_per_second": 0.214,
|
78055 |
+
"step": 7650
|
78056 |
+
},
|
78057 |
+
{
|
78058 |
+
"epoch": 0.9905168786613587,
|
78059 |
+
"grad_norm": 4.920852837984811,
|
78060 |
+
"learning_rate": 3.4014298118452415e-10,
|
78061 |
+
"loss": 2.8179,
|
78062 |
+
"step": 7651
|
78063 |
+
},
|
78064 |
+
{
|
78065 |
+
"epoch": 0.9906463410687122,
|
78066 |
+
"grad_norm": 1.602683531452493,
|
78067 |
+
"learning_rate": 3.308886408031481e-10,
|
78068 |
+
"loss": 2.4128,
|
78069 |
+
"step": 7652
|
78070 |
+
},
|
78071 |
+
{
|
78072 |
+
"epoch": 0.9907758034760656,
|
78073 |
+
"grad_norm": 2.1060923661609112,
|
78074 |
+
"learning_rate": 3.217619017492624e-10,
|
78075 |
+
"loss": 2.374,
|
78076 |
+
"step": 7653
|
78077 |
+
},
|
78078 |
+
{
|
78079 |
+
"epoch": 0.9909052658834191,
|
78080 |
+
"grad_norm": 1.6204569374519608,
|
78081 |
+
"learning_rate": 3.127627658870702e-10,
|
78082 |
+
"loss": 2.3461,
|
78083 |
+
"step": 7654
|
78084 |
+
},
|
78085 |
+
{
|
78086 |
+
"epoch": 0.9910347282907725,
|
78087 |
+
"grad_norm": 1.1008634986922998,
|
78088 |
+
"learning_rate": 3.038912350549622e-10,
|
78089 |
+
"loss": 2.2966,
|
78090 |
+
"step": 7655
|
78091 |
+
},
|
78092 |
+
{
|
78093 |
+
"epoch": 0.9910347282907725,
|
78094 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78095 |
+
"eval_PRM F1": 0.047058823529411764,
|
78096 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78097 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78098 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78099 |
+
"eval_PRM Precision": 1.0,
|
78100 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78101 |
+
"eval_PRM Specificty": 1.0,
|
78102 |
+
"eval_loss": 1.4950505495071411,
|
78103 |
+
"eval_runtime": 13.6045,
|
78104 |
+
"eval_samples_per_second": 3.234,
|
78105 |
+
"eval_steps_per_second": 0.221,
|
78106 |
+
"step": 7655
|
78107 |
+
},
|
78108 |
+
{
|
78109 |
+
"epoch": 0.991164190698126,
|
78110 |
+
"grad_norm": 2.1093204697180243,
|
78111 |
+
"learning_rate": 2.9514731106509985e-10,
|
78112 |
+
"loss": 2.3687,
|
78113 |
+
"step": 7656
|
78114 |
+
},
|
78115 |
+
{
|
78116 |
+
"epoch": 0.9912936531054795,
|
78117 |
+
"grad_norm": 1.9171259210801308,
|
78118 |
+
"learning_rate": 2.8653099570355447e-10,
|
78119 |
+
"loss": 2.4336,
|
78120 |
+
"step": 7657
|
78121 |
+
},
|
78122 |
+
{
|
78123 |
+
"epoch": 0.9914231155128329,
|
78124 |
+
"grad_norm": 2.3993710563557116,
|
78125 |
+
"learning_rate": 2.7804229073044584e-10,
|
78126 |
+
"loss": 2.3958,
|
78127 |
+
"step": 7658
|
78128 |
+
},
|
78129 |
+
{
|
78130 |
+
"epoch": 0.9915525779201865,
|
78131 |
+
"grad_norm": 1.2834745849996845,
|
78132 |
+
"learning_rate": 2.696811978797342e-10,
|
78133 |
+
"loss": 2.4238,
|
78134 |
+
"step": 7659
|
78135 |
+
},
|
78136 |
+
{
|
78137 |
+
"epoch": 0.9916820403275399,
|
78138 |
+
"grad_norm": 2.040530792128796,
|
78139 |
+
"learning_rate": 2.614477188593589e-10,
|
78140 |
+
"loss": 2.2217,
|
78141 |
+
"step": 7660
|
78142 |
+
},
|
78143 |
+
{
|
78144 |
+
"epoch": 0.9916820403275399,
|
78145 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78146 |
+
"eval_PRM F1": 0.047058823529411764,
|
78147 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78148 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78149 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78150 |
+
"eval_PRM Precision": 1.0,
|
78151 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78152 |
+
"eval_PRM Specificty": 1.0,
|
78153 |
+
"eval_loss": 1.498579502105713,
|
78154 |
+
"eval_runtime": 13.8118,
|
78155 |
+
"eval_samples_per_second": 3.186,
|
78156 |
+
"eval_steps_per_second": 0.217,
|
78157 |
+
"step": 7660
|
78158 |
+
},
|
78159 |
+
{
|
78160 |
+
"epoch": 0.9918115027348934,
|
78161 |
+
"grad_norm": 1.146395381273546,
|
78162 |
+
"learning_rate": 2.5334185535109967e-10,
|
78163 |
+
"loss": 2.1646,
|
78164 |
+
"step": 7661
|
78165 |
+
},
|
78166 |
+
{
|
78167 |
+
"epoch": 0.9919409651422468,
|
78168 |
+
"grad_norm": 1.1207392650082522,
|
78169 |
+
"learning_rate": 2.453636090108541e-10,
|
78170 |
+
"loss": 2.3267,
|
78171 |
+
"step": 7662
|
78172 |
+
},
|
78173 |
+
{
|
78174 |
+
"epoch": 0.9920704275496003,
|
78175 |
+
"grad_norm": 1.749398980875084,
|
78176 |
+
"learning_rate": 2.375129814682908e-10,
|
78177 |
+
"loss": 2.3125,
|
78178 |
+
"step": 7663
|
78179 |
+
},
|
78180 |
+
{
|
78181 |
+
"epoch": 0.9921998899569537,
|
78182 |
+
"grad_norm": 1.4514255061424015,
|
78183 |
+
"learning_rate": 2.2978997432705764e-10,
|
78184 |
+
"loss": 2.2827,
|
78185 |
+
"step": 7664
|
78186 |
+
},
|
78187 |
+
{
|
78188 |
+
"epoch": 0.9923293523643072,
|
78189 |
+
"grad_norm": 1.3744947085311505,
|
78190 |
+
"learning_rate": 2.2219458916471205e-10,
|
78191 |
+
"loss": 2.1985,
|
78192 |
+
"step": 7665
|
78193 |
+
},
|
78194 |
+
{
|
78195 |
+
"epoch": 0.9923293523643072,
|
78196 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78197 |
+
"eval_PRM F1": 0.047058823529411764,
|
78198 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78199 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78200 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78201 |
+
"eval_PRM Precision": 1.0,
|
78202 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78203 |
+
"eval_PRM Specificty": 1.0,
|
78204 |
+
"eval_loss": 1.4971369504928589,
|
78205 |
+
"eval_runtime": 13.8034,
|
78206 |
+
"eval_samples_per_second": 3.188,
|
78207 |
+
"eval_steps_per_second": 0.217,
|
78208 |
+
"step": 7665
|
78209 |
+
},
|
78210 |
+
{
|
78211 |
+
"epoch": 0.9924588147716606,
|
78212 |
+
"grad_norm": 1.9129455404788904,
|
78213 |
+
"learning_rate": 2.1472682753279073e-10,
|
78214 |
+
"loss": 2.4282,
|
78215 |
+
"step": 7666
|
78216 |
+
},
|
78217 |
+
{
|
78218 |
+
"epoch": 0.9925882771790141,
|
78219 |
+
"grad_norm": 1.6119870892933774,
|
78220 |
+
"learning_rate": 2.0738669095680952e-10,
|
78221 |
+
"loss": 2.4197,
|
78222 |
+
"step": 7667
|
78223 |
+
},
|
78224 |
+
{
|
78225 |
+
"epoch": 0.9927177395863676,
|
78226 |
+
"grad_norm": 2.025417941338564,
|
78227 |
+
"learning_rate": 2.0017418093598583e-10,
|
78228 |
+
"loss": 2.5481,
|
78229 |
+
"step": 7668
|
78230 |
+
},
|
78231 |
+
{
|
78232 |
+
"epoch": 0.9928472019937211,
|
78233 |
+
"grad_norm": 1.7189407225275821,
|
78234 |
+
"learning_rate": 1.9308929894372441e-10,
|
78235 |
+
"loss": 2.249,
|
78236 |
+
"step": 7669
|
78237 |
+
},
|
78238 |
+
{
|
78239 |
+
"epoch": 0.9929766644010746,
|
78240 |
+
"grad_norm": 2.6274370671024756,
|
78241 |
+
"learning_rate": 1.8613204642720096e-10,
|
78242 |
+
"loss": 2.5369,
|
78243 |
+
"step": 7670
|
78244 |
+
},
|
78245 |
+
{
|
78246 |
+
"epoch": 0.9929766644010746,
|
78247 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78248 |
+
"eval_PRM F1": 0.047058823529411764,
|
78249 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78250 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78251 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78252 |
+
"eval_PRM Precision": 1.0,
|
78253 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78254 |
+
"eval_PRM Specificty": 1.0,
|
78255 |
+
"eval_loss": 1.4986461400985718,
|
78256 |
+
"eval_runtime": 14.0005,
|
78257 |
+
"eval_samples_per_second": 3.143,
|
78258 |
+
"eval_steps_per_second": 0.214,
|
78259 |
+
"step": 7670
|
78260 |
+
},
|
78261 |
+
{
|
78262 |
+
"epoch": 0.993106126808428,
|
78263 |
+
"grad_norm": 1.733854755515088,
|
78264 |
+
"learning_rate": 1.793024248076397e-10,
|
78265 |
+
"loss": 2.4133,
|
78266 |
+
"step": 7671
|
78267 |
+
},
|
78268 |
+
{
|
78269 |
+
"epoch": 0.9932355892157815,
|
78270 |
+
"grad_norm": 3.5557057390425304,
|
78271 |
+
"learning_rate": 1.726004354801053e-10,
|
78272 |
+
"loss": 2.5322,
|
78273 |
+
"step": 7672
|
78274 |
+
},
|
78275 |
+
{
|
78276 |
+
"epoch": 0.9933650516231349,
|
78277 |
+
"grad_norm": 2.5351565687700592,
|
78278 |
+
"learning_rate": 1.660260798136415e-10,
|
78279 |
+
"loss": 2.5747,
|
78280 |
+
"step": 7673
|
78281 |
+
},
|
78282 |
+
{
|
78283 |
+
"epoch": 0.9934945140304884,
|
78284 |
+
"grad_norm": 2.155793966215189,
|
78285 |
+
"learning_rate": 1.5957935915113242e-10,
|
78286 |
+
"loss": 2.3379,
|
78287 |
+
"step": 7674
|
78288 |
+
},
|
78289 |
+
{
|
78290 |
+
"epoch": 0.9936239764378418,
|
78291 |
+
"grad_norm": 2.561628615143274,
|
78292 |
+
"learning_rate": 1.532602748094414e-10,
|
78293 |
+
"loss": 2.3025,
|
78294 |
+
"step": 7675
|
78295 |
+
},
|
78296 |
+
{
|
78297 |
+
"epoch": 0.9936239764378418,
|
78298 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78299 |
+
"eval_PRM F1": 0.047058823529411764,
|
78300 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78301 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78302 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78303 |
+
"eval_PRM Precision": 1.0,
|
78304 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78305 |
+
"eval_PRM Specificty": 1.0,
|
78306 |
+
"eval_loss": 1.4943625926971436,
|
78307 |
+
"eval_runtime": 14.2777,
|
78308 |
+
"eval_samples_per_second": 3.082,
|
78309 |
+
"eval_steps_per_second": 0.21,
|
78310 |
+
"step": 7675
|
78311 |
+
},
|
78312 |
+
{
|
78313 |
+
"epoch": 0.9937534388451953,
|
78314 |
+
"grad_norm": 1.4586702756577856,
|
78315 |
+
"learning_rate": 1.470688280794108e-10,
|
78316 |
+
"loss": 2.4033,
|
78317 |
+
"step": 7676
|
78318 |
+
},
|
78319 |
+
{
|
78320 |
+
"epoch": 0.9938829012525487,
|
78321 |
+
"grad_norm": 1.7218032271015382,
|
78322 |
+
"learning_rate": 1.410050202257929e-10,
|
78323 |
+
"loss": 2.3477,
|
78324 |
+
"step": 7677
|
78325 |
+
},
|
78326 |
+
{
|
78327 |
+
"epoch": 0.9940123636599023,
|
78328 |
+
"grad_norm": 2.066099568411916,
|
78329 |
+
"learning_rate": 1.3506885248718016e-10,
|
78330 |
+
"loss": 2.2959,
|
78331 |
+
"step": 7678
|
78332 |
+
},
|
78333 |
+
{
|
78334 |
+
"epoch": 0.9941418260672558,
|
78335 |
+
"grad_norm": 2.098283026570332,
|
78336 |
+
"learning_rate": 1.2926032607621375e-10,
|
78337 |
+
"loss": 2.5974,
|
78338 |
+
"step": 7679
|
78339 |
+
},
|
78340 |
+
{
|
78341 |
+
"epoch": 0.9942712884746092,
|
78342 |
+
"grad_norm": 1.6535605307389196,
|
78343 |
+
"learning_rate": 1.2357944217937513e-10,
|
78344 |
+
"loss": 2.1594,
|
78345 |
+
"step": 7680
|
78346 |
+
},
|
78347 |
+
{
|
78348 |
+
"epoch": 0.9942712884746092,
|
78349 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78350 |
+
"eval_PRM F1": 0.047058823529411764,
|
78351 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78352 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78353 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78354 |
+
"eval_PRM Precision": 1.0,
|
78355 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78356 |
+
"eval_PRM Specificty": 1.0,
|
78357 |
+
"eval_loss": 1.49853515625,
|
78358 |
+
"eval_runtime": 13.6992,
|
78359 |
+
"eval_samples_per_second": 3.212,
|
78360 |
+
"eval_steps_per_second": 0.219,
|
78361 |
+
"step": 7680
|
78362 |
+
},
|
78363 |
+
{
|
78364 |
+
"epoch": 0.9944007508819627,
|
78365 |
+
"grad_norm": 1.6459518108342641,
|
78366 |
+
"learning_rate": 1.1802620195712487e-10,
|
78367 |
+
"loss": 2.2615,
|
78368 |
+
"step": 7681
|
78369 |
+
},
|
78370 |
+
{
|
78371 |
+
"epoch": 0.9945302132893161,
|
78372 |
+
"grad_norm": 2.1204057298785104,
|
78373 |
+
"learning_rate": 1.1260060654376403e-10,
|
78374 |
+
"loss": 2.3254,
|
78375 |
+
"step": 7682
|
78376 |
+
},
|
78377 |
+
{
|
78378 |
+
"epoch": 0.9946596756966696,
|
78379 |
+
"grad_norm": 1.4357120737136115,
|
78380 |
+
"learning_rate": 1.0730265704764208e-10,
|
78381 |
+
"loss": 2.2004,
|
78382 |
+
"step": 7683
|
78383 |
+
},
|
78384 |
+
{
|
78385 |
+
"epoch": 0.994789138104023,
|
78386 |
+
"grad_norm": 1.3728528906654596,
|
78387 |
+
"learning_rate": 1.0213235455094895e-10,
|
78388 |
+
"loss": 2.1951,
|
78389 |
+
"step": 7684
|
78390 |
+
},
|
78391 |
+
{
|
78392 |
+
"epoch": 0.9949186005113765,
|
78393 |
+
"grad_norm": 3.21900535857529,
|
78394 |
+
"learning_rate": 9.708970010978424e-11,
|
78395 |
+
"loss": 2.3359,
|
78396 |
+
"step": 7685
|
78397 |
+
},
|
78398 |
+
{
|
78399 |
+
"epoch": 0.9949186005113765,
|
78400 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78401 |
+
"eval_PRM F1": 0.047058823529411764,
|
78402 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78403 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78404 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78405 |
+
"eval_PRM Precision": 1.0,
|
78406 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78407 |
+
"eval_PRM Specificty": 1.0,
|
78408 |
+
"eval_loss": 1.4950062036514282,
|
78409 |
+
"eval_runtime": 14.3312,
|
78410 |
+
"eval_samples_per_second": 3.07,
|
78411 |
+
"eval_steps_per_second": 0.209,
|
78412 |
+
"step": 7685
|
78413 |
+
},
|
78414 |
+
{
|
78415 |
+
"epoch": 0.9950480629187299,
|
78416 |
+
"grad_norm": 1.6686792950383713,
|
78417 |
+
"learning_rate": 9.217469475429619e-11,
|
78418 |
+
"loss": 2.2742,
|
78419 |
+
"step": 7686
|
78420 |
+
},
|
78421 |
+
{
|
78422 |
+
"epoch": 0.9951775253260834,
|
78423 |
+
"grad_norm": 2.2413072500408857,
|
78424 |
+
"learning_rate": 8.738733948840395e-11,
|
78425 |
+
"loss": 2.1741,
|
78426 |
+
"step": 7687
|
78427 |
+
},
|
78428 |
+
{
|
78429 |
+
"epoch": 0.995306987733437,
|
78430 |
+
"grad_norm": 2.9388080891161135,
|
78431 |
+
"learning_rate": 8.272763529007521e-11,
|
78432 |
+
"loss": 2.5015,
|
78433 |
+
"step": 7688
|
78434 |
+
},
|
78435 |
+
{
|
78436 |
+
"epoch": 0.9954364501407904,
|
78437 |
+
"grad_norm": 1.2448282419855086,
|
78438 |
+
"learning_rate": 7.819558311111808e-11,
|
78439 |
+
"loss": 2.158,
|
78440 |
+
"step": 7689
|
78441 |
+
},
|
78442 |
+
{
|
78443 |
+
"epoch": 0.9955659125481439,
|
78444 |
+
"grad_norm": 1.5770981587173023,
|
78445 |
+
"learning_rate": 7.379118387731977e-11,
|
78446 |
+
"loss": 2.605,
|
78447 |
+
"step": 7690
|
78448 |
+
},
|
78449 |
+
{
|
78450 |
+
"epoch": 0.9955659125481439,
|
78451 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78452 |
+
"eval_PRM F1": 0.047058823529411764,
|
78453 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78454 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78455 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78456 |
+
"eval_PRM Precision": 1.0,
|
78457 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78458 |
+
"eval_PRM Specificty": 1.0,
|
78459 |
+
"eval_loss": 1.4972034692764282,
|
78460 |
+
"eval_runtime": 14.7955,
|
78461 |
+
"eval_samples_per_second": 2.974,
|
78462 |
+
"eval_steps_per_second": 0.203,
|
78463 |
+
"step": 7690
|
78464 |
+
},
|
78465 |
+
{
|
78466 |
+
"epoch": 0.9956953749554973,
|
78467 |
+
"grad_norm": 2.2313159470985133,
|
78468 |
+
"learning_rate": 6.951443848830785e-11,
|
78469 |
+
"loss": 2.2695,
|
78470 |
+
"step": 7691
|
78471 |
+
},
|
78472 |
+
{
|
78473 |
+
"epoch": 0.9958248373628508,
|
78474 |
+
"grad_norm": 1.583978575410211,
|
78475 |
+
"learning_rate": 6.53653478177585e-11,
|
78476 |
+
"loss": 2.5193,
|
78477 |
+
"step": 7692
|
78478 |
+
},
|
78479 |
+
{
|
78480 |
+
"epoch": 0.9959542997702042,
|
78481 |
+
"grad_norm": 2.6329864704078796,
|
78482 |
+
"learning_rate": 6.134391271318818e-11,
|
78483 |
+
"loss": 2.1763,
|
78484 |
+
"step": 7693
|
78485 |
+
},
|
78486 |
+
{
|
78487 |
+
"epoch": 0.9960837621775577,
|
78488 |
+
"grad_norm": 1.9048592700851108,
|
78489 |
+
"learning_rate": 5.745013399609256e-11,
|
78490 |
+
"loss": 2.2927,
|
78491 |
+
"step": 7694
|
78492 |
+
},
|
78493 |
+
{
|
78494 |
+
"epoch": 0.9962132245849111,
|
78495 |
+
"grad_norm": 1.2076390899334741,
|
78496 |
+
"learning_rate": 5.3684012461738265e-11,
|
78497 |
+
"loss": 2.2676,
|
78498 |
+
"step": 7695
|
78499 |
+
},
|
78500 |
+
{
|
78501 |
+
"epoch": 0.9962132245849111,
|
78502 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78503 |
+
"eval_PRM F1": 0.047058823529411764,
|
78504 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78505 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78506 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78507 |
+
"eval_PRM Precision": 1.0,
|
78508 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78509 |
+
"eval_PRM Specificty": 1.0,
|
78510 |
+
"eval_loss": 1.4951171875,
|
78511 |
+
"eval_runtime": 15.159,
|
78512 |
+
"eval_samples_per_second": 2.903,
|
78513 |
+
"eval_steps_per_second": 0.198,
|
78514 |
+
"step": 7695
|
78515 |
+
},
|
78516 |
+
{
|
78517 |
+
"epoch": 0.9963426869922646,
|
78518 |
+
"grad_norm": 1.6253165998282486,
|
78519 |
+
"learning_rate": 5.004554887957924e-11,
|
78520 |
+
"loss": 2.2419,
|
78521 |
+
"step": 7696
|
78522 |
+
},
|
78523 |
+
{
|
78524 |
+
"epoch": 0.996472149399618,
|
78525 |
+
"grad_norm": 2.151738149711087,
|
78526 |
+
"learning_rate": 4.6534743992701635e-11,
|
78527 |
+
"loss": 2.6377,
|
78528 |
+
"step": 7697
|
78529 |
+
},
|
78530 |
+
{
|
78531 |
+
"epoch": 0.9966016118069716,
|
78532 |
+
"grad_norm": 1.9143121114234054,
|
78533 |
+
"learning_rate": 4.315159851837891e-11,
|
78534 |
+
"loss": 2.3135,
|
78535 |
+
"step": 7698
|
78536 |
+
},
|
78537 |
+
{
|
78538 |
+
"epoch": 0.996731074214325,
|
78539 |
+
"grad_norm": 1.5023196597769006,
|
78540 |
+
"learning_rate": 3.989611314765551e-11,
|
78541 |
+
"loss": 2.3765,
|
78542 |
+
"step": 7699
|
78543 |
+
},
|
78544 |
+
{
|
78545 |
+
"epoch": 0.9968605366216785,
|
78546 |
+
"grad_norm": 1.9325526285591823,
|
78547 |
+
"learning_rate": 3.6768288545485644e-11,
|
78548 |
+
"loss": 2.4561,
|
78549 |
+
"step": 7700
|
78550 |
+
},
|
78551 |
+
{
|
78552 |
+
"epoch": 0.9968605366216785,
|
78553 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78554 |
+
"eval_PRM F1": 0.047058823529411764,
|
78555 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78556 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78557 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78558 |
+
"eval_PRM Precision": 1.0,
|
78559 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78560 |
+
"eval_PRM Specificty": 1.0,
|
78561 |
+
"eval_loss": 1.4986239671707153,
|
78562 |
+
"eval_runtime": 14.5617,
|
78563 |
+
"eval_samples_per_second": 3.022,
|
78564 |
+
"eval_steps_per_second": 0.206,
|
78565 |
+
"step": 7700
|
78566 |
+
},
|
78567 |
+
{
|
78568 |
+
"epoch": 0.996989999029032,
|
78569 |
+
"grad_norm": 1.4172984725227213,
|
78570 |
+
"learning_rate": 3.376812535080265e-11,
|
78571 |
+
"loss": 2.373,
|
78572 |
+
"step": 7701
|
78573 |
+
},
|
78574 |
+
{
|
78575 |
+
"epoch": 0.9971194614363854,
|
78576 |
+
"grad_norm": 1.3930326525241075,
|
78577 |
+
"learning_rate": 3.0895624176519036e-11,
|
78578 |
+
"loss": 2.3347,
|
78579 |
+
"step": 7702
|
78580 |
+
},
|
78581 |
+
{
|
78582 |
+
"epoch": 0.9972489238437389,
|
78583 |
+
"grad_norm": 1.3253972058225258,
|
78584 |
+
"learning_rate": 2.8150785609318276e-11,
|
78585 |
+
"loss": 2.4019,
|
78586 |
+
"step": 7703
|
78587 |
+
},
|
78588 |
+
{
|
78589 |
+
"epoch": 0.9973783862510923,
|
78590 |
+
"grad_norm": 1.5408348561147462,
|
78591 |
+
"learning_rate": 2.5533610209932392e-11,
|
78592 |
+
"loss": 2.3169,
|
78593 |
+
"step": 7704
|
78594 |
+
},
|
78595 |
+
{
|
78596 |
+
"epoch": 0.9975078486584458,
|
78597 |
+
"grad_norm": 2.3013034684937415,
|
78598 |
+
"learning_rate": 2.3044098512933766e-11,
|
78599 |
+
"loss": 2.5208,
|
78600 |
+
"step": 7705
|
78601 |
+
},
|
78602 |
+
{
|
78603 |
+
"epoch": 0.9975078486584458,
|
78604 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78605 |
+
"eval_PRM F1": 0.047058823529411764,
|
78606 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78607 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78608 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78609 |
+
"eval_PRM Precision": 1.0,
|
78610 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78611 |
+
"eval_PRM Specificty": 1.0,
|
78612 |
+
"eval_loss": 1.4978249073028564,
|
78613 |
+
"eval_runtime": 13.1607,
|
78614 |
+
"eval_samples_per_second": 3.343,
|
78615 |
+
"eval_steps_per_second": 0.228,
|
78616 |
+
"step": 7705
|
78617 |
+
},
|
78618 |
+
{
|
78619 |
+
"epoch": 0.9976373110657992,
|
78620 |
+
"grad_norm": 2.259931800473903,
|
78621 |
+
"learning_rate": 2.068225102694332e-11,
|
78622 |
+
"loss": 2.1882,
|
78623 |
+
"step": 7706
|
78624 |
+
},
|
78625 |
+
{
|
78626 |
+
"epoch": 0.9977667734731527,
|
78627 |
+
"grad_norm": 1.7890624569047229,
|
78628 |
+
"learning_rate": 1.8448068234283578e-11,
|
78629 |
+
"loss": 2.4595,
|
78630 |
+
"step": 7707
|
78631 |
+
},
|
78632 |
+
{
|
78633 |
+
"epoch": 0.9978962358805062,
|
78634 |
+
"grad_norm": 1.2401958555293557,
|
78635 |
+
"learning_rate": 1.634155059146436e-11,
|
78636 |
+
"loss": 2.2754,
|
78637 |
+
"step": 7708
|
78638 |
+
},
|
78639 |
+
{
|
78640 |
+
"epoch": 0.9980256982878597,
|
78641 |
+
"grad_norm": 1.0822891791340588,
|
78642 |
+
"learning_rate": 1.4362698528697094e-11,
|
78643 |
+
"loss": 2.4175,
|
78644 |
+
"step": 7709
|
78645 |
+
},
|
78646 |
+
{
|
78647 |
+
"epoch": 0.9981551606952132,
|
78648 |
+
"grad_norm": 1.6127689542819217,
|
78649 |
+
"learning_rate": 1.2511512450241737e-11,
|
78650 |
+
"loss": 2.4133,
|
78651 |
+
"step": 7710
|
78652 |
+
},
|
78653 |
+
{
|
78654 |
+
"epoch": 0.9981551606952132,
|
78655 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78656 |
+
"eval_PRM F1": 0.047058823529411764,
|
78657 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78658 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78659 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78660 |
+
"eval_PRM Precision": 1.0,
|
78661 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78662 |
+
"eval_PRM Specificty": 1.0,
|
78663 |
+
"eval_loss": 1.497114658355713,
|
78664 |
+
"eval_runtime": 14.9295,
|
78665 |
+
"eval_samples_per_second": 2.947,
|
78666 |
+
"eval_steps_per_second": 0.201,
|
78667 |
+
"step": 7710
|
78668 |
+
},
|
78669 |
+
{
|
78670 |
+
"epoch": 0.9982846231025666,
|
78671 |
+
"grad_norm": 1.7945901352090199,
|
78672 |
+
"learning_rate": 1.0787992734198616e-11,
|
78673 |
+
"loss": 2.4961,
|
78674 |
+
"step": 7711
|
78675 |
+
},
|
78676 |
+
{
|
78677 |
+
"epoch": 0.9984140855099201,
|
78678 |
+
"grad_norm": 1.3928437748878686,
|
78679 |
+
"learning_rate": 9.192139732716598e-12,
|
78680 |
+
"loss": 2.4438,
|
78681 |
+
"step": 7712
|
78682 |
+
},
|
78683 |
+
{
|
78684 |
+
"epoch": 0.9985435479172735,
|
78685 |
+
"grad_norm": 2.9852499683419422,
|
78686 |
+
"learning_rate": 7.723953771715531e-12,
|
78687 |
+
"loss": 2.3274,
|
78688 |
+
"step": 7713
|
78689 |
+
},
|
78690 |
+
{
|
78691 |
+
"epoch": 0.998673010324627,
|
78692 |
+
"grad_norm": 2.150419645821146,
|
78693 |
+
"learning_rate": 6.383435151094408e-12,
|
78694 |
+
"loss": 2.4382,
|
78695 |
+
"step": 7714
|
78696 |
+
},
|
78697 |
+
{
|
78698 |
+
"epoch": 0.9988024727319804,
|
78699 |
+
"grad_norm": 1.657145315275181,
|
78700 |
+
"learning_rate": 5.170584144661983e-12,
|
78701 |
+
"loss": 2.5605,
|
78702 |
+
"step": 7715
|
78703 |
+
},
|
78704 |
+
{
|
78705 |
+
"epoch": 0.9988024727319804,
|
78706 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78707 |
+
"eval_PRM F1": 0.047058823529411764,
|
78708 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78709 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78710 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78711 |
+
"eval_PRM Precision": 1.0,
|
78712 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78713 |
+
"eval_PRM Specificty": 1.0,
|
78714 |
+
"eval_loss": 1.4971591234207153,
|
78715 |
+
"eval_runtime": 15.0228,
|
78716 |
+
"eval_samples_per_second": 2.929,
|
78717 |
+
"eval_steps_per_second": 0.2,
|
78718 |
+
"step": 7715
|
78719 |
+
},
|
78720 |
+
{
|
78721 |
+
"epoch": 0.9989319351393339,
|
78722 |
+
"grad_norm": 1.4614906798288771,
|
78723 |
+
"learning_rate": 4.0854010002755466e-12,
|
78724 |
+
"loss": 2.2092,
|
78725 |
+
"step": 7716
|
78726 |
+
},
|
78727 |
+
{
|
78728 |
+
"epoch": 0.9990613975466874,
|
78729 |
+
"grad_norm": 2.668030707241662,
|
78730 |
+
"learning_rate": 3.12788593949398e-12,
|
78731 |
+
"loss": 2.4106,
|
78732 |
+
"step": 7717
|
78733 |
+
},
|
78734 |
+
{
|
78735 |
+
"epoch": 0.9991908599540409,
|
78736 |
+
"grad_norm": 1.4888698890193617,
|
78737 |
+
"learning_rate": 2.298039157994092e-12,
|
78738 |
+
"loss": 2.3706,
|
78739 |
+
"step": 7718
|
78740 |
+
},
|
78741 |
+
{
|
78742 |
+
"epoch": 0.9993203223613943,
|
78743 |
+
"grad_norm": 1.558004894188135,
|
78744 |
+
"learning_rate": 1.5958608251542828e-12,
|
78745 |
+
"loss": 2.3206,
|
78746 |
+
"step": 7719
|
78747 |
+
},
|
78748 |
+
{
|
78749 |
+
"epoch": 0.9994497847687478,
|
78750 |
+
"grad_norm": 2.4306575905372134,
|
78751 |
+
"learning_rate": 1.0213510846096564e-12,
|
78752 |
+
"loss": 2.4265,
|
78753 |
+
"step": 7720
|
78754 |
+
},
|
78755 |
+
{
|
78756 |
+
"epoch": 0.9994497847687478,
|
78757 |
+
"eval_PRM Accuracy": 0.2358490566037736,
|
78758 |
+
"eval_PRM F1": 0.047058823529411764,
|
78759 |
+
"eval_PRM F1 AUC": 0.5120481927710844,
|
78760 |
+
"eval_PRM F1 Neg": 0.36220472440944884,
|
78761 |
+
"eval_PRM NPV": 0.22115384615384615,
|
78762 |
+
"eval_PRM Precision": 1.0,
|
78763 |
+
"eval_PRM Recall": 0.024096385542168676,
|
78764 |
+
"eval_PRM Specificty": 1.0,
|
78765 |
+
"eval_loss": 1.4971591234207153,
|
78766 |
+
"eval_runtime": 14.2046,
|
78767 |
+
"eval_samples_per_second": 3.098,
|
78768 |
+
"eval_steps_per_second": 0.211,
|
78769 |
+
"step": 7720
|
78770 |
+
},
|
78771 |
+
{
|
78772 |
+
"epoch": 0.9995792471761012,
|
78773 |
+
"grad_norm": 2.458064036563779,
|
78774 |
+
"learning_rate": 5.745100535581305e-13,
|
78775 |
+
"loss": 2.3542,
|
78776 |
+
"step": 7721
|
78777 |
+
},
|
78778 |
+
{
|
78779 |
+
"epoch": 0.9997087095834547,
|
78780 |
+
"grad_norm": 2.295762242918334,
|
78781 |
+
"learning_rate": 2.5533782331554903e-13,
|
78782 |
+
"loss": 2.5386,
|
78783 |
+
"step": 7722
|
78784 |
+
},
|
78785 |
+
{
|
78786 |
+
"epoch": 0.9998381719908082,
|
78787 |
+
"grad_norm": 2.5005497572826556,
|
78788 |
+
"learning_rate": 6.383445910751463e-14,
|
78789 |
+
"loss": 2.3679,
|
78790 |
+
"step": 7723
|
78791 |
+
},
|
78792 |
+
{
|
78793 |
+
"epoch": 0.9999676343981616,
|
78794 |
+
"grad_norm": 2.2599512852855326,
|
78795 |
+
"learning_rate": 0.0,
|
78796 |
+
"loss": 2.4456,
|
78797 |
+
"step": 7724
|
78798 |
}
|
78799 |
],
|
78800 |
"logging_steps": 1,
|
|
|
78809 |
"should_evaluate": false,
|
78810 |
"should_log": false,
|
78811 |
"should_save": true,
|
78812 |
+
"should_training_stop": true
|
78813 |
},
|
78814 |
"attributes": {}
|
78815 |
}
|
78816 |
},
|
78817 |
+
"total_flos": 6493605880496128.0,
|
78818 |
"train_batch_size": 4,
|
78819 |
"trial_name": null,
|
78820 |
"trial_params": null
|