Training in progress, step 3614, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +1 -1
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/optimizer_0/__4_0.distcp +1 -1
- last-checkpoint/optimizer_0/__5_0.distcp +1 -1
- last-checkpoint/optimizer_0/__6_0.distcp +1 -1
- last-checkpoint/optimizer_0/__7_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__4_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__5_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__6_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__7_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +814 -4
last-checkpoint/optimizer_0/.metadata
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1130174
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b57402c9a04b6a83c55631793729abf4135e066e940007e5be32d1b580969c5
|
3 |
size 1130174
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7242420036
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b082226244870565d28873eba1d507851f0af49f4366f9a37b971141a812bac
|
3 |
size 7242420036
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7242473280
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94d305a995adc93c165aa366b5cfa7cea152c21931cf4b250d0e0f76cf977dc6
|
3 |
size 7242473280
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7242469280
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce18db0ad733a422365d20b5d06639b0c2e8c082890dcee42f3551c057730408
|
3 |
size 7242469280
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7242469280
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d1fd6f4a58d8be9b8f4ed9c00096312ac2a19be2b05a83ef344c3caa6fddfb0
|
3 |
size 7242469280
|
last-checkpoint/optimizer_0/__4_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7242471556
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dc057d11daf173591f444f3e42edce09b92d5f8fa6cf2ce68d3ec7654d90c72
|
3 |
size 7242471556
|
last-checkpoint/optimizer_0/__5_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7242471556
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6f87ef3065fc4b31182b03a4a17a669e4d62016bfcaf05113ea4793e1d37546
|
3 |
size 7242471556
|
last-checkpoint/optimizer_0/__6_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7242471556
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aec250e9248ff1407735a2e38d1f42ca07be095eb7d003ce955c101c4bf37f4
|
3 |
size 7242471556
|
last-checkpoint/optimizer_0/__7_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7242481476
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f74c7304e82f6b6ca55a4662ed1108321ff9c5805d048fa74b7eced022291ad
|
3 |
size 7242481476
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3621209428
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96a514e755628059dd0e872f51e9e9c03303f32c8bb807e60ede865d5074b952
|
3 |
size 3621209428
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3621209428
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dabe2309a6a1a18a27265cbb83de5cbf207f35f6ef8f3af19f22dcb76735a00
|
3 |
size 3621209428
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3621209428
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a6c9137d8b880c8044701a18add7422c303241d293c1b566c8d7be63bbda599
|
3 |
size 3621209428
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3621209428
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ec44558c788ed40d962a7a7ac7d47f797d88247ed258682966e1699e97fb500
|
3 |
size 3621209428
|
last-checkpoint/pytorch_model_fsdp_0/__4_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3621209428
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bda62e5d3c9978ebc867b9a5f6e80ddeeb08c36f0e4c0f708e3b4650f841d014
|
3 |
size 3621209428
|
last-checkpoint/pytorch_model_fsdp_0/__5_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3621209428
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79b845162dd458f08b347feb21b321cc26bd49f8a90ec6692679f7cc11b5843f
|
3 |
size 3621209428
|
last-checkpoint/pytorch_model_fsdp_0/__6_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3621209428
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:293577d7261da8e39c01fb14ec56c5de9511e969763ae8a2c7e45eae07bb561c
|
3 |
size 3621209428
|
last-checkpoint/pytorch_model_fsdp_0/__7_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3621209428
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b52705274f60d2ba6542de0450be411273d3dedeffe53a0afb4913227822ee7a
|
3 |
size 3621209428
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d567092e95857ec2cef0d42902ce0b7b850534369c357b926f52de75e6483b0c
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a43106d8f7ea7f1b60a91f9c8dae3af8f4578d4a265510b906d37cd15ce8b30e
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2aebd5c68731a8f11af25059034403042750f2005fa4358349f244ccdff4a5d
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6d2d85ac2d4aae4a01ac2269d0aad572db524a435d3c1422869e17cc061f2f8
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:148f3aa738ce382dd8aa49c18096e0644495a5e19f8e8545441dda2bef5afd1c
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bfb030b9b6b13727bb580725f96bd18f81f6aa676eb9c08437ece83091d55a2
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ae2f442a63b70c5786d8b8bc2857bc39fb849023f943cc0d7c5d4a9ba3108f4
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6333d466bbde25a38a25343ca06136fd75295a54967090d2fe4c85ebb3b769ee
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21662799391bd7e52a3f2ab8ae98718c240562ca5104178c8186801ae79acc59
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -24939,6 +24939,816 @@
|
|
24939 |
"eval_samples_per_second": 6.662,
|
24940 |
"eval_steps_per_second": 0.236,
|
24941 |
"step": 3500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24942 |
}
|
24943 |
],
|
24944 |
"logging_steps": 1,
|
@@ -24953,12 +25763,12 @@
|
|
24953 |
"should_evaluate": false,
|
24954 |
"should_log": false,
|
24955 |
"should_save": true,
|
24956 |
-
"should_training_stop":
|
24957 |
},
|
24958 |
"attributes": {}
|
24959 |
}
|
24960 |
},
|
24961 |
-
"total_flos": 8.
|
24962 |
"train_batch_size": 2,
|
24963 |
"trial_name": null,
|
24964 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9999308293560213,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 3614,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
24939 |
"eval_samples_per_second": 6.662,
|
24940 |
"eval_steps_per_second": 0.236,
|
24941 |
"step": 3500
|
24942 |
+
},
|
24943 |
+
{
|
24944 |
+
"epoch": 0.9686656982776509,
|
24945 |
+
"grad_norm": 5.235546112060547,
|
24946 |
+
"learning_rate": 4.9179685389096896e-08,
|
24947 |
+
"loss": 0.3635,
|
24948 |
+
"step": 3501
|
24949 |
+
},
|
24950 |
+
{
|
24951 |
+
"epoch": 0.9689423808535658,
|
24952 |
+
"grad_norm": 6.3581223487854,
|
24953 |
+
"learning_rate": 4.831449832053525e-08,
|
24954 |
+
"loss": 0.443,
|
24955 |
+
"step": 3502
|
24956 |
+
},
|
24957 |
+
{
|
24958 |
+
"epoch": 0.9692190634294805,
|
24959 |
+
"grad_norm": 8.401195526123047,
|
24960 |
+
"learning_rate": 4.74569523379137e-08,
|
24961 |
+
"loss": 0.4533,
|
24962 |
+
"step": 3503
|
24963 |
+
},
|
24964 |
+
{
|
24965 |
+
"epoch": 0.9694957460053953,
|
24966 |
+
"grad_norm": 10.938070297241211,
|
24967 |
+
"learning_rate": 4.6607048764534814e-08,
|
24968 |
+
"loss": 0.4868,
|
24969 |
+
"step": 3504
|
24970 |
+
},
|
24971 |
+
{
|
24972 |
+
"epoch": 0.9697724285813101,
|
24973 |
+
"grad_norm": 5.621330261230469,
|
24974 |
+
"learning_rate": 4.5764788911908384e-08,
|
24975 |
+
"loss": 0.4099,
|
24976 |
+
"step": 3505
|
24977 |
+
},
|
24978 |
+
{
|
24979 |
+
"epoch": 0.9700491111572249,
|
24980 |
+
"grad_norm": 8.117724418640137,
|
24981 |
+
"learning_rate": 4.493017407975087e-08,
|
24982 |
+
"loss": 0.419,
|
24983 |
+
"step": 3506
|
24984 |
+
},
|
24985 |
+
{
|
24986 |
+
"epoch": 0.9703257937331397,
|
24987 |
+
"grad_norm": 6.391302108764648,
|
24988 |
+
"learning_rate": 4.410320555597869e-08,
|
24989 |
+
"loss": 0.4502,
|
24990 |
+
"step": 3507
|
24991 |
+
},
|
24992 |
+
{
|
24993 |
+
"epoch": 0.9706024763090544,
|
24994 |
+
"grad_norm": 12.152544975280762,
|
24995 |
+
"learning_rate": 4.328388461671107e-08,
|
24996 |
+
"loss": 0.5602,
|
24997 |
+
"step": 3508
|
24998 |
+
},
|
24999 |
+
{
|
25000 |
+
"epoch": 0.9708791588849692,
|
25001 |
+
"grad_norm": 11.282763481140137,
|
25002 |
+
"learning_rate": 4.247221252626499e-08,
|
25003 |
+
"loss": 0.5866,
|
25004 |
+
"step": 3509
|
25005 |
+
},
|
25006 |
+
{
|
25007 |
+
"epoch": 0.971155841460884,
|
25008 |
+
"grad_norm": 6.5668253898620605,
|
25009 |
+
"learning_rate": 4.166819053715521e-08,
|
25010 |
+
"loss": 0.397,
|
25011 |
+
"step": 3510
|
25012 |
+
},
|
25013 |
+
{
|
25014 |
+
"epoch": 0.9714325240367988,
|
25015 |
+
"grad_norm": 6.569908618927002,
|
25016 |
+
"learning_rate": 4.087181989008926e-08,
|
25017 |
+
"loss": 0.4887,
|
25018 |
+
"step": 3511
|
25019 |
+
},
|
25020 |
+
{
|
25021 |
+
"epoch": 0.9717092066127135,
|
25022 |
+
"grad_norm": 9.562713623046875,
|
25023 |
+
"learning_rate": 4.0083101813970794e-08,
|
25024 |
+
"loss": 0.3468,
|
25025 |
+
"step": 3512
|
25026 |
+
},
|
25027 |
+
{
|
25028 |
+
"epoch": 0.9719858891886284,
|
25029 |
+
"grad_norm": 9.6434907913208,
|
25030 |
+
"learning_rate": 3.93020375258929e-08,
|
25031 |
+
"loss": 0.36,
|
25032 |
+
"step": 3513
|
25033 |
+
},
|
25034 |
+
{
|
25035 |
+
"epoch": 0.9722625717645431,
|
25036 |
+
"grad_norm": 11.750981330871582,
|
25037 |
+
"learning_rate": 3.852862823113701e-08,
|
25038 |
+
"loss": 0.4,
|
25039 |
+
"step": 3514
|
25040 |
+
},
|
25041 |
+
{
|
25042 |
+
"epoch": 0.972539254340458,
|
25043 |
+
"grad_norm": 6.20719575881958,
|
25044 |
+
"learning_rate": 3.776287512317345e-08,
|
25045 |
+
"loss": 0.3826,
|
25046 |
+
"step": 3515
|
25047 |
+
},
|
25048 |
+
{
|
25049 |
+
"epoch": 0.9728159369163727,
|
25050 |
+
"grad_norm": 5.036905288696289,
|
25051 |
+
"learning_rate": 3.7004779383657543e-08,
|
25052 |
+
"loss": 0.3524,
|
25053 |
+
"step": 3516
|
25054 |
+
},
|
25055 |
+
{
|
25056 |
+
"epoch": 0.9730926194922874,
|
25057 |
+
"grad_norm": 10.913909912109375,
|
25058 |
+
"learning_rate": 3.6254342182428515e-08,
|
25059 |
+
"loss": 0.3848,
|
25060 |
+
"step": 3517
|
25061 |
+
},
|
25062 |
+
{
|
25063 |
+
"epoch": 0.9733693020682023,
|
25064 |
+
"grad_norm": 5.970122814178467,
|
25065 |
+
"learning_rate": 3.5511564677506715e-08,
|
25066 |
+
"loss": 0.3321,
|
25067 |
+
"step": 3518
|
25068 |
+
},
|
25069 |
+
{
|
25070 |
+
"epoch": 0.973645984644117,
|
25071 |
+
"grad_norm": 11.325397491455078,
|
25072 |
+
"learning_rate": 3.477644801509306e-08,
|
25073 |
+
"loss": 0.4813,
|
25074 |
+
"step": 3519
|
25075 |
+
},
|
25076 |
+
{
|
25077 |
+
"epoch": 0.9739226672200318,
|
25078 |
+
"grad_norm": 6.1236066818237305,
|
25079 |
+
"learning_rate": 3.404899332956735e-08,
|
25080 |
+
"loss": 0.3705,
|
25081 |
+
"step": 3520
|
25082 |
+
},
|
25083 |
+
{
|
25084 |
+
"epoch": 0.9741993497959466,
|
25085 |
+
"grad_norm": 7.407733917236328,
|
25086 |
+
"learning_rate": 3.332920174348497e-08,
|
25087 |
+
"loss": 0.4437,
|
25088 |
+
"step": 3521
|
25089 |
+
},
|
25090 |
+
{
|
25091 |
+
"epoch": 0.9744760323718614,
|
25092 |
+
"grad_norm": 6.992598533630371,
|
25093 |
+
"learning_rate": 3.2617074367576886e-08,
|
25094 |
+
"loss": 0.4798,
|
25095 |
+
"step": 3522
|
25096 |
+
},
|
25097 |
+
{
|
25098 |
+
"epoch": 0.9747527149477762,
|
25099 |
+
"grad_norm": 8.392826080322266,
|
25100 |
+
"learning_rate": 3.1912612300747384e-08,
|
25101 |
+
"loss": 0.3594,
|
25102 |
+
"step": 3523
|
25103 |
+
},
|
25104 |
+
{
|
25105 |
+
"epoch": 0.975029397523691,
|
25106 |
+
"grad_norm": 6.5278639793396,
|
25107 |
+
"learning_rate": 3.121581663007134e-08,
|
25108 |
+
"loss": 0.4254,
|
25109 |
+
"step": 3524
|
25110 |
+
},
|
25111 |
+
{
|
25112 |
+
"epoch": 0.9753060800996057,
|
25113 |
+
"grad_norm": 6.965699195861816,
|
25114 |
+
"learning_rate": 3.052668843079365e-08,
|
25115 |
+
"loss": 0.3341,
|
25116 |
+
"step": 3525
|
25117 |
+
},
|
25118 |
+
{
|
25119 |
+
"epoch": 0.9755827626755205,
|
25120 |
+
"grad_norm": 6.369235992431641,
|
25121 |
+
"learning_rate": 2.984522876632812e-08,
|
25122 |
+
"loss": 0.4223,
|
25123 |
+
"step": 3526
|
25124 |
+
},
|
25125 |
+
{
|
25126 |
+
"epoch": 0.9758594452514353,
|
25127 |
+
"grad_norm": 5.943597793579102,
|
25128 |
+
"learning_rate": 2.9171438688254118e-08,
|
25129 |
+
"loss": 0.4617,
|
25130 |
+
"step": 3527
|
25131 |
+
},
|
25132 |
+
{
|
25133 |
+
"epoch": 0.97613612782735,
|
25134 |
+
"grad_norm": 8.69240665435791,
|
25135 |
+
"learning_rate": 2.850531923631661e-08,
|
25136 |
+
"loss": 0.4274,
|
25137 |
+
"step": 3528
|
25138 |
+
},
|
25139 |
+
{
|
25140 |
+
"epoch": 0.9764128104032649,
|
25141 |
+
"grad_norm": 6.936566352844238,
|
25142 |
+
"learning_rate": 2.784687143842224e-08,
|
25143 |
+
"loss": 0.3652,
|
25144 |
+
"step": 3529
|
25145 |
+
},
|
25146 |
+
{
|
25147 |
+
"epoch": 0.9766894929791796,
|
25148 |
+
"grad_norm": 6.269064426422119,
|
25149 |
+
"learning_rate": 2.7196096310641573e-08,
|
25150 |
+
"loss": 0.4139,
|
25151 |
+
"step": 3530
|
25152 |
+
},
|
25153 |
+
{
|
25154 |
+
"epoch": 0.9769661755550945,
|
25155 |
+
"grad_norm": 7.397728443145752,
|
25156 |
+
"learning_rate": 2.6552994857204083e-08,
|
25157 |
+
"loss": 0.4978,
|
25158 |
+
"step": 3531
|
25159 |
+
},
|
25160 |
+
{
|
25161 |
+
"epoch": 0.9772428581310092,
|
25162 |
+
"grad_norm": 6.353926181793213,
|
25163 |
+
"learning_rate": 2.5917568070496503e-08,
|
25164 |
+
"loss": 0.4468,
|
25165 |
+
"step": 3532
|
25166 |
+
},
|
25167 |
+
{
|
25168 |
+
"epoch": 0.9775195407069239,
|
25169 |
+
"grad_norm": 6.348305702209473,
|
25170 |
+
"learning_rate": 2.528981693106558e-08,
|
25171 |
+
"loss": 0.4326,
|
25172 |
+
"step": 3533
|
25173 |
+
},
|
25174 |
+
{
|
25175 |
+
"epoch": 0.9777962232828388,
|
25176 |
+
"grad_norm": 7.442327499389648,
|
25177 |
+
"learning_rate": 2.4669742407610332e-08,
|
25178 |
+
"loss": 0.4463,
|
25179 |
+
"step": 3534
|
25180 |
+
},
|
25181 |
+
{
|
25182 |
+
"epoch": 0.9780729058587535,
|
25183 |
+
"grad_norm": 6.542540073394775,
|
25184 |
+
"learning_rate": 2.4057345456987013e-08,
|
25185 |
+
"loss": 0.4501,
|
25186 |
+
"step": 3535
|
25187 |
+
},
|
25188 |
+
{
|
25189 |
+
"epoch": 0.9783495884346683,
|
25190 |
+
"grad_norm": 6.655422210693359,
|
25191 |
+
"learning_rate": 2.3452627024200815e-08,
|
25192 |
+
"loss": 0.4371,
|
25193 |
+
"step": 3536
|
25194 |
+
},
|
25195 |
+
{
|
25196 |
+
"epoch": 0.9786262710105831,
|
25197 |
+
"grad_norm": 6.123124599456787,
|
25198 |
+
"learning_rate": 2.2855588042410838e-08,
|
25199 |
+
"loss": 0.4332,
|
25200 |
+
"step": 3537
|
25201 |
+
},
|
25202 |
+
{
|
25203 |
+
"epoch": 0.9789029535864979,
|
25204 |
+
"grad_norm": 6.827947616577148,
|
25205 |
+
"learning_rate": 2.226622943292567e-08,
|
25206 |
+
"loss": 0.4926,
|
25207 |
+
"step": 3538
|
25208 |
+
},
|
25209 |
+
{
|
25210 |
+
"epoch": 0.9791796361624127,
|
25211 |
+
"grad_norm": 4.61018705368042,
|
25212 |
+
"learning_rate": 2.1684552105199485e-08,
|
25213 |
+
"loss": 0.4004,
|
25214 |
+
"step": 3539
|
25215 |
+
},
|
25216 |
+
{
|
25217 |
+
"epoch": 0.9794563187383275,
|
25218 |
+
"grad_norm": 8.030633926391602,
|
25219 |
+
"learning_rate": 2.1110556956835394e-08,
|
25220 |
+
"loss": 0.3661,
|
25221 |
+
"step": 3540
|
25222 |
+
},
|
25223 |
+
{
|
25224 |
+
"epoch": 0.9797330013142422,
|
25225 |
+
"grad_norm": 5.738858222961426,
|
25226 |
+
"learning_rate": 2.0544244873582643e-08,
|
25227 |
+
"loss": 0.5086,
|
25228 |
+
"step": 3541
|
25229 |
+
},
|
25230 |
+
{
|
25231 |
+
"epoch": 0.980009683890157,
|
25232 |
+
"grad_norm": 6.600244998931885,
|
25233 |
+
"learning_rate": 1.9985616729332747e-08,
|
25234 |
+
"loss": 0.391,
|
25235 |
+
"step": 3542
|
25236 |
+
},
|
25237 |
+
{
|
25238 |
+
"epoch": 0.9802863664660718,
|
25239 |
+
"grad_norm": 5.6170806884765625,
|
25240 |
+
"learning_rate": 1.9434673386120594e-08,
|
25241 |
+
"loss": 0.4199,
|
25242 |
+
"step": 3543
|
25243 |
+
},
|
25244 |
+
{
|
25245 |
+
"epoch": 0.9805630490419865,
|
25246 |
+
"grad_norm": 6.685068607330322,
|
25247 |
+
"learning_rate": 1.889141569412223e-08,
|
25248 |
+
"loss": 0.4244,
|
25249 |
+
"step": 3544
|
25250 |
+
},
|
25251 |
+
{
|
25252 |
+
"epoch": 0.9808397316179014,
|
25253 |
+
"grad_norm": 8.005044937133789,
|
25254 |
+
"learning_rate": 1.8355844491654284e-08,
|
25255 |
+
"loss": 0.4274,
|
25256 |
+
"step": 3545
|
25257 |
+
},
|
25258 |
+
{
|
25259 |
+
"epoch": 0.9811164141938161,
|
25260 |
+
"grad_norm": 9.169407844543457,
|
25261 |
+
"learning_rate": 1.7827960605171778e-08,
|
25262 |
+
"loss": 0.4674,
|
25263 |
+
"step": 3546
|
25264 |
+
},
|
25265 |
+
{
|
25266 |
+
"epoch": 0.981393096769731,
|
25267 |
+
"grad_norm": 8.508087158203125,
|
25268 |
+
"learning_rate": 1.7307764849266996e-08,
|
25269 |
+
"loss": 0.4041,
|
25270 |
+
"step": 3547
|
25271 |
+
},
|
25272 |
+
{
|
25273 |
+
"epoch": 0.9816697793456457,
|
25274 |
+
"grad_norm": 7.333049774169922,
|
25275 |
+
"learning_rate": 1.679525802666948e-08,
|
25276 |
+
"loss": 0.3855,
|
25277 |
+
"step": 3548
|
25278 |
+
},
|
25279 |
+
{
|
25280 |
+
"epoch": 0.9819464619215605,
|
25281 |
+
"grad_norm": 5.400395393371582,
|
25282 |
+
"learning_rate": 1.6290440928241613e-08,
|
25283 |
+
"loss": 0.4063,
|
25284 |
+
"step": 3549
|
25285 |
+
},
|
25286 |
+
{
|
25287 |
+
"epoch": 0.9822231444974753,
|
25288 |
+
"grad_norm": 8.128486633300781,
|
25289 |
+
"learning_rate": 1.5793314332982477e-08,
|
25290 |
+
"loss": 0.3734,
|
25291 |
+
"step": 3550
|
25292 |
+
},
|
25293 |
+
{
|
25294 |
+
"epoch": 0.98249982707339,
|
25295 |
+
"grad_norm": 7.244846343994141,
|
25296 |
+
"learning_rate": 1.5303879008021773e-08,
|
25297 |
+
"loss": 0.3945,
|
25298 |
+
"step": 3551
|
25299 |
+
},
|
25300 |
+
{
|
25301 |
+
"epoch": 0.9827765096493049,
|
25302 |
+
"grad_norm": 7.506839275360107,
|
25303 |
+
"learning_rate": 1.482213570861979e-08,
|
25304 |
+
"loss": 0.4032,
|
25305 |
+
"step": 3552
|
25306 |
+
},
|
25307 |
+
{
|
25308 |
+
"epoch": 0.9830531922252196,
|
25309 |
+
"grad_norm": 6.991761207580566,
|
25310 |
+
"learning_rate": 1.4348085178169658e-08,
|
25311 |
+
"loss": 0.358,
|
25312 |
+
"step": 3553
|
25313 |
+
},
|
25314 |
+
{
|
25315 |
+
"epoch": 0.9833298748011344,
|
25316 |
+
"grad_norm": 6.618139266967773,
|
25317 |
+
"learning_rate": 1.3881728148191775e-08,
|
25318 |
+
"loss": 0.4842,
|
25319 |
+
"step": 3554
|
25320 |
+
},
|
25321 |
+
{
|
25322 |
+
"epoch": 0.9836065573770492,
|
25323 |
+
"grad_norm": 5.5989274978637695,
|
25324 |
+
"learning_rate": 1.3423065338334373e-08,
|
25325 |
+
"loss": 0.3639,
|
25326 |
+
"step": 3555
|
25327 |
+
},
|
25328 |
+
{
|
25329 |
+
"epoch": 0.983883239952964,
|
25330 |
+
"grad_norm": 10.364370346069336,
|
25331 |
+
"learning_rate": 1.2972097456373512e-08,
|
25332 |
+
"loss": 0.3677,
|
25333 |
+
"step": 3556
|
25334 |
+
},
|
25335 |
+
{
|
25336 |
+
"epoch": 0.9841599225288787,
|
25337 |
+
"grad_norm": 6.9868340492248535,
|
25338 |
+
"learning_rate": 1.2528825198210304e-08,
|
25339 |
+
"loss": 0.4279,
|
25340 |
+
"step": 3557
|
25341 |
+
},
|
25342 |
+
{
|
25343 |
+
"epoch": 0.9844366051047935,
|
25344 |
+
"grad_norm": 7.0398125648498535,
|
25345 |
+
"learning_rate": 1.209324924787092e-08,
|
25346 |
+
"loss": 0.4194,
|
25347 |
+
"step": 3558
|
25348 |
+
},
|
25349 |
+
{
|
25350 |
+
"epoch": 0.9847132876807083,
|
25351 |
+
"grad_norm": 4.859399795532227,
|
25352 |
+
"learning_rate": 1.1665370277504917e-08,
|
25353 |
+
"loss": 0.4016,
|
25354 |
+
"step": 3559
|
25355 |
+
},
|
25356 |
+
{
|
25357 |
+
"epoch": 0.984989970256623,
|
25358 |
+
"grad_norm": 6.274821758270264,
|
25359 |
+
"learning_rate": 1.1245188947384133e-08,
|
25360 |
+
"loss": 0.4823,
|
25361 |
+
"step": 3560
|
25362 |
+
},
|
25363 |
+
{
|
25364 |
+
"epoch": 0.9852666528325379,
|
25365 |
+
"grad_norm": 7.054055213928223,
|
25366 |
+
"learning_rate": 1.083270590590213e-08,
|
25367 |
+
"loss": 0.4286,
|
25368 |
+
"step": 3561
|
25369 |
+
},
|
25370 |
+
{
|
25371 |
+
"epoch": 0.9855433354084526,
|
25372 |
+
"grad_norm": 4.351117134094238,
|
25373 |
+
"learning_rate": 1.0427921789573636e-08,
|
25374 |
+
"loss": 0.39,
|
25375 |
+
"step": 3562
|
25376 |
+
},
|
25377 |
+
{
|
25378 |
+
"epoch": 0.9858200179843675,
|
25379 |
+
"grad_norm": 8.033738136291504,
|
25380 |
+
"learning_rate": 1.003083722303233e-08,
|
25381 |
+
"loss": 0.4421,
|
25382 |
+
"step": 3563
|
25383 |
+
},
|
25384 |
+
{
|
25385 |
+
"epoch": 0.9860967005602822,
|
25386 |
+
"grad_norm": 4.982435703277588,
|
25387 |
+
"learning_rate": 9.641452819030283e-09,
|
25388 |
+
"loss": 0.332,
|
25389 |
+
"step": 3564
|
25390 |
+
},
|
25391 |
+
{
|
25392 |
+
"epoch": 0.986373383136197,
|
25393 |
+
"grad_norm": 15.608251571655273,
|
25394 |
+
"learning_rate": 9.259769178438516e-09,
|
25395 |
+
"loss": 0.3892,
|
25396 |
+
"step": 3565
|
25397 |
+
},
|
25398 |
+
{
|
25399 |
+
"epoch": 0.9866500657121118,
|
25400 |
+
"grad_norm": 7.015858173370361,
|
25401 |
+
"learning_rate": 8.885786890242554e-09,
|
25402 |
+
"loss": 0.3854,
|
25403 |
+
"step": 3566
|
25404 |
+
},
|
25405 |
+
{
|
25406 |
+
"epoch": 0.9869267482880265,
|
25407 |
+
"grad_norm": 10.827554702758789,
|
25408 |
+
"learning_rate": 8.519506531545763e-09,
|
25409 |
+
"loss": 0.5023,
|
25410 |
+
"step": 3567
|
25411 |
+
},
|
25412 |
+
{
|
25413 |
+
"epoch": 0.9872034308639414,
|
25414 |
+
"grad_norm": 5.95730447769165,
|
25415 |
+
"learning_rate": 8.160928667566015e-09,
|
25416 |
+
"loss": 0.4019,
|
25417 |
+
"step": 3568
|
25418 |
+
},
|
25419 |
+
{
|
25420 |
+
"epoch": 0.9874801134398561,
|
25421 |
+
"grad_norm": 10.602973937988281,
|
25422 |
+
"learning_rate": 7.81005385163458e-09,
|
25423 |
+
"loss": 0.4717,
|
25424 |
+
"step": 3569
|
25425 |
+
},
|
25426 |
+
{
|
25427 |
+
"epoch": 0.9877567960157709,
|
25428 |
+
"grad_norm": 6.7663164138793945,
|
25429 |
+
"learning_rate": 7.466882625196126e-09,
|
25430 |
+
"loss": 0.3858,
|
25431 |
+
"step": 3570
|
25432 |
+
},
|
25433 |
+
{
|
25434 |
+
"epoch": 0.9880334785916857,
|
25435 |
+
"grad_norm": 14.224672317504883,
|
25436 |
+
"learning_rate": 7.13141551780816e-09,
|
25437 |
+
"loss": 0.587,
|
25438 |
+
"step": 3571
|
25439 |
+
},
|
25440 |
+
{
|
25441 |
+
"epoch": 0.9883101611676005,
|
25442 |
+
"grad_norm": 10.713641166687012,
|
25443 |
+
"learning_rate": 6.803653047138814e-09,
|
25444 |
+
"loss": 0.379,
|
25445 |
+
"step": 3572
|
25446 |
+
},
|
25447 |
+
{
|
25448 |
+
"epoch": 0.9885868437435152,
|
25449 |
+
"grad_norm": 6.506649494171143,
|
25450 |
+
"learning_rate": 6.48359571896906e-09,
|
25451 |
+
"loss": 0.3868,
|
25452 |
+
"step": 3573
|
25453 |
+
},
|
25454 |
+
{
|
25455 |
+
"epoch": 0.9888635263194301,
|
25456 |
+
"grad_norm": 5.888006210327148,
|
25457 |
+
"learning_rate": 6.171244027187162e-09,
|
25458 |
+
"loss": 0.4599,
|
25459 |
+
"step": 3574
|
25460 |
+
},
|
25461 |
+
{
|
25462 |
+
"epoch": 0.9891402088953448,
|
25463 |
+
"grad_norm": 4.954348087310791,
|
25464 |
+
"learning_rate": 5.866598453792005e-09,
|
25465 |
+
"loss": 0.4112,
|
25466 |
+
"step": 3575
|
25467 |
+
},
|
25468 |
+
{
|
25469 |
+
"epoch": 0.9894168914712596,
|
25470 |
+
"grad_norm": 6.747551441192627,
|
25471 |
+
"learning_rate": 5.569659468891431e-09,
|
25472 |
+
"loss": 0.3918,
|
25473 |
+
"step": 3576
|
25474 |
+
},
|
25475 |
+
{
|
25476 |
+
"epoch": 0.9896935740471744,
|
25477 |
+
"grad_norm": 6.1678972244262695,
|
25478 |
+
"learning_rate": 5.2804275306994615e-09,
|
25479 |
+
"loss": 0.49,
|
25480 |
+
"step": 3577
|
25481 |
+
},
|
25482 |
+
{
|
25483 |
+
"epoch": 0.9899702566230891,
|
25484 |
+
"grad_norm": 9.625385284423828,
|
25485 |
+
"learning_rate": 4.998903085539075e-09,
|
25486 |
+
"loss": 0.4239,
|
25487 |
+
"step": 3578
|
25488 |
+
},
|
25489 |
+
{
|
25490 |
+
"epoch": 0.990246939199004,
|
25491 |
+
"grad_norm": 12.304625511169434,
|
25492 |
+
"learning_rate": 4.7250865678377665e-09,
|
25493 |
+
"loss": 0.4733,
|
25494 |
+
"step": 3579
|
25495 |
+
},
|
25496 |
+
{
|
25497 |
+
"epoch": 0.9905236217749187,
|
25498 |
+
"grad_norm": 7.027407646179199,
|
25499 |
+
"learning_rate": 4.458978400130321e-09,
|
25500 |
+
"loss": 0.5059,
|
25501 |
+
"step": 3580
|
25502 |
+
},
|
25503 |
+
{
|
25504 |
+
"epoch": 0.9908003043508335,
|
25505 |
+
"grad_norm": 7.810775279998779,
|
25506 |
+
"learning_rate": 4.200578993054927e-09,
|
25507 |
+
"loss": 0.5052,
|
25508 |
+
"step": 3581
|
25509 |
+
},
|
25510 |
+
{
|
25511 |
+
"epoch": 0.9910769869267483,
|
25512 |
+
"grad_norm": 8.920899391174316,
|
25513 |
+
"learning_rate": 3.9498887453559565e-09,
|
25514 |
+
"loss": 0.4596,
|
25515 |
+
"step": 3582
|
25516 |
+
},
|
25517 |
+
{
|
25518 |
+
"epoch": 0.991353669502663,
|
25519 |
+
"grad_norm": 5.510407447814941,
|
25520 |
+
"learning_rate": 3.70690804387952e-09,
|
25521 |
+
"loss": 0.3717,
|
25522 |
+
"step": 3583
|
25523 |
+
},
|
25524 |
+
{
|
25525 |
+
"epoch": 0.9916303520785779,
|
25526 |
+
"grad_norm": 10.58802604675293,
|
25527 |
+
"learning_rate": 3.4716372635767993e-09,
|
25528 |
+
"loss": 0.3158,
|
25529 |
+
"step": 3584
|
25530 |
+
},
|
25531 |
+
{
|
25532 |
+
"epoch": 0.9919070346544926,
|
25533 |
+
"grad_norm": 6.024365425109863,
|
25534 |
+
"learning_rate": 3.2440767675007144e-09,
|
25535 |
+
"loss": 0.4396,
|
25536 |
+
"step": 3585
|
25537 |
+
},
|
25538 |
+
{
|
25539 |
+
"epoch": 0.9921837172304074,
|
25540 |
+
"grad_norm": 4.717132091522217,
|
25541 |
+
"learning_rate": 3.024226906805927e-09,
|
25542 |
+
"loss": 0.3467,
|
25543 |
+
"step": 3586
|
25544 |
+
},
|
25545 |
+
{
|
25546 |
+
"epoch": 0.9924603998063222,
|
25547 |
+
"grad_norm": 6.302797794342041,
|
25548 |
+
"learning_rate": 2.8120880207493928e-09,
|
25549 |
+
"loss": 0.4866,
|
25550 |
+
"step": 3587
|
25551 |
+
},
|
25552 |
+
{
|
25553 |
+
"epoch": 0.992737082382237,
|
25554 |
+
"grad_norm": 5.059561252593994,
|
25555 |
+
"learning_rate": 2.607660436688697e-09,
|
25556 |
+
"loss": 0.4122,
|
25557 |
+
"step": 3588
|
25558 |
+
},
|
25559 |
+
{
|
25560 |
+
"epoch": 0.9930137649581517,
|
25561 |
+
"grad_norm": 7.106605052947998,
|
25562 |
+
"learning_rate": 2.4109444700815e-09,
|
25563 |
+
"loss": 0.4918,
|
25564 |
+
"step": 3589
|
25565 |
+
},
|
25566 |
+
{
|
25567 |
+
"epoch": 0.9932904475340666,
|
25568 |
+
"grad_norm": 5.022238731384277,
|
25569 |
+
"learning_rate": 2.221940424485536e-09,
|
25570 |
+
"loss": 0.3677,
|
25571 |
+
"step": 3590
|
25572 |
+
},
|
25573 |
+
{
|
25574 |
+
"epoch": 0.9935671301099813,
|
25575 |
+
"grad_norm": 6.2519330978393555,
|
25576 |
+
"learning_rate": 2.040648591559169e-09,
|
25577 |
+
"loss": 0.482,
|
25578 |
+
"step": 3591
|
25579 |
+
},
|
25580 |
+
{
|
25581 |
+
"epoch": 0.993843812685896,
|
25582 |
+
"grad_norm": 7.059159278869629,
|
25583 |
+
"learning_rate": 1.8670692510580625e-09,
|
25584 |
+
"loss": 0.44,
|
25585 |
+
"step": 3592
|
25586 |
+
},
|
25587 |
+
{
|
25588 |
+
"epoch": 0.9941204952618109,
|
25589 |
+
"grad_norm": 7.916522026062012,
|
25590 |
+
"learning_rate": 1.7012026708373985e-09,
|
25591 |
+
"loss": 0.5168,
|
25592 |
+
"step": 3593
|
25593 |
+
},
|
25594 |
+
{
|
25595 |
+
"epoch": 0.9943971778377256,
|
25596 |
+
"grad_norm": 8.189726829528809,
|
25597 |
+
"learning_rate": 1.5430491068513243e-09,
|
25598 |
+
"loss": 0.4161,
|
25599 |
+
"step": 3594
|
25600 |
+
},
|
25601 |
+
{
|
25602 |
+
"epoch": 0.9946738604136405,
|
25603 |
+
"grad_norm": 10.01470947265625,
|
25604 |
+
"learning_rate": 1.3926088031507302e-09,
|
25605 |
+
"loss": 0.4104,
|
25606 |
+
"step": 3595
|
25607 |
+
},
|
25608 |
+
{
|
25609 |
+
"epoch": 0.9949505429895552,
|
25610 |
+
"grad_norm": 6.116405010223389,
|
25611 |
+
"learning_rate": 1.2498819918843609e-09,
|
25612 |
+
"loss": 0.4398,
|
25613 |
+
"step": 3596
|
25614 |
+
},
|
25615 |
+
{
|
25616 |
+
"epoch": 0.99522722556547,
|
25617 |
+
"grad_norm": 5.669225692749023,
|
25618 |
+
"learning_rate": 1.1148688932977047e-09,
|
25619 |
+
"loss": 0.4811,
|
25620 |
+
"step": 3597
|
25621 |
+
},
|
25622 |
+
{
|
25623 |
+
"epoch": 0.9955039081413848,
|
25624 |
+
"grad_norm": 7.954200267791748,
|
25625 |
+
"learning_rate": 9.875697157329945e-10,
|
25626 |
+
"loss": 0.5057,
|
25627 |
+
"step": 3598
|
25628 |
+
},
|
25629 |
+
{
|
25630 |
+
"epoch": 0.9957805907172996,
|
25631 |
+
"grad_norm": 12.605840682983398,
|
25632 |
+
"learning_rate": 8.679846556303162e-10,
|
25633 |
+
"loss": 0.4993,
|
25634 |
+
"step": 3599
|
25635 |
+
},
|
25636 |
+
{
|
25637 |
+
"epoch": 0.9960572732932144,
|
25638 |
+
"grad_norm": 9.73613166809082,
|
25639 |
+
"learning_rate": 7.561138975242798e-10,
|
25640 |
+
"loss": 0.4514,
|
25641 |
+
"step": 3600
|
25642 |
+
},
|
25643 |
+
{
|
25644 |
+
"epoch": 0.9960572732932144,
|
25645 |
+
"eval_accuracy": 0.7272727272727273,
|
25646 |
+
"eval_f1": 0.3137254901960784,
|
25647 |
+
"eval_loss": 0.6381392478942871,
|
25648 |
+
"eval_precision": 0.5454545454545454,
|
25649 |
+
"eval_recall": 0.22018348623853212,
|
25650 |
+
"eval_runtime": 118.7108,
|
25651 |
+
"eval_samples_per_second": 1.904,
|
25652 |
+
"eval_steps_per_second": 0.067,
|
25653 |
+
"step": 3600
|
25654 |
+
},
|
25655 |
+
{
|
25656 |
+
"epoch": 0.9963339558691291,
|
25657 |
+
"grad_norm": 5.538408279418945,
|
25658 |
+
"learning_rate": 6.519576140451289e-10,
|
25659 |
+
"loss": 0.4153,
|
25660 |
+
"step": 3601
|
25661 |
+
},
|
25662 |
+
{
|
25663 |
+
"epoch": 0.9966106384450439,
|
25664 |
+
"grad_norm": 5.18894624710083,
|
25665 |
+
"learning_rate": 5.555159659204057e-10,
|
25666 |
+
"loss": 0.4861,
|
25667 |
+
"step": 3602
|
25668 |
+
},
|
25669 |
+
{
|
25670 |
+
"epoch": 0.9968873210209587,
|
25671 |
+
"grad_norm": 7.266717910766602,
|
25672 |
+
"learning_rate": 4.667891019710657e-10,
|
25673 |
+
"loss": 0.4072,
|
25674 |
+
"step": 3603
|
25675 |
+
},
|
25676 |
+
{
|
25677 |
+
"epoch": 0.9971640035968735,
|
25678 |
+
"grad_norm": 8.036751747131348,
|
25679 |
+
"learning_rate": 3.857771591142534e-10,
|
25680 |
+
"loss": 0.4333,
|
25681 |
+
"step": 3604
|
25682 |
+
},
|
25683 |
+
{
|
25684 |
+
"epoch": 0.9974406861727882,
|
25685 |
+
"grad_norm": 9.265700340270996,
|
25686 |
+
"learning_rate": 3.124802623627465e-10,
|
25687 |
+
"loss": 0.4393,
|
25688 |
+
"step": 3605
|
25689 |
+
},
|
25690 |
+
{
|
25691 |
+
"epoch": 0.9977173687487031,
|
25692 |
+
"grad_norm": 5.40745210647583,
|
25693 |
+
"learning_rate": 2.4689852482162604e-10,
|
25694 |
+
"loss": 0.3861,
|
25695 |
+
"step": 3606
|
25696 |
+
},
|
25697 |
+
{
|
25698 |
+
"epoch": 0.9979940513246178,
|
25699 |
+
"grad_norm": 5.892364025115967,
|
25700 |
+
"learning_rate": 1.8903204769271655e-10,
|
25701 |
+
"loss": 0.4592,
|
25702 |
+
"step": 3607
|
25703 |
+
},
|
25704 |
+
{
|
25705 |
+
"epoch": 0.9982707339005326,
|
25706 |
+
"grad_norm": 8.214534759521484,
|
25707 |
+
"learning_rate": 1.38880920271256e-10,
|
25708 |
+
"loss": 0.4237,
|
25709 |
+
"step": 3608
|
25710 |
+
},
|
25711 |
+
{
|
25712 |
+
"epoch": 0.9985474164764474,
|
25713 |
+
"grad_norm": 7.651143550872803,
|
25714 |
+
"learning_rate": 9.64452199464505e-11,
|
25715 |
+
"loss": 0.3782,
|
25716 |
+
"step": 3609
|
25717 |
+
},
|
25718 |
+
{
|
25719 |
+
"epoch": 0.9988240990523621,
|
25720 |
+
"grad_norm": 6.732639312744141,
|
25721 |
+
"learning_rate": 6.172501220313986e-11,
|
25722 |
+
"loss": 0.4235,
|
25723 |
+
"step": 3610
|
25724 |
+
},
|
25725 |
+
{
|
25726 |
+
"epoch": 0.999100781628277,
|
25727 |
+
"grad_norm": 11.96877670288086,
|
25728 |
+
"learning_rate": 3.472035061791168e-11,
|
25729 |
+
"loss": 0.484,
|
25730 |
+
"step": 3611
|
25731 |
+
},
|
25732 |
+
{
|
25733 |
+
"epoch": 0.9993774642041917,
|
25734 |
+
"grad_norm": 6.621433258056641,
|
25735 |
+
"learning_rate": 1.5431276862987176e-11,
|
25736 |
+
"loss": 0.4221,
|
25737 |
+
"step": 3612
|
25738 |
+
},
|
25739 |
+
{
|
25740 |
+
"epoch": 0.9996541467801066,
|
25741 |
+
"grad_norm": 9.467500686645508,
|
25742 |
+
"learning_rate": 3.857820704000759e-12,
|
25743 |
+
"loss": 0.4852,
|
25744 |
+
"step": 3613
|
25745 |
+
},
|
25746 |
+
{
|
25747 |
+
"epoch": 0.9999308293560213,
|
25748 |
+
"grad_norm": 6.072620391845703,
|
25749 |
+
"learning_rate": 0.0,
|
25750 |
+
"loss": 0.401,
|
25751 |
+
"step": 3614
|
25752 |
}
|
25753 |
],
|
25754 |
"logging_steps": 1,
|
|
|
25763 |
"should_evaluate": false,
|
25764 |
"should_log": false,
|
25765 |
"should_save": true,
|
25766 |
+
"should_training_stop": true
|
25767 |
},
|
25768 |
"attributes": {}
|
25769 |
}
|
25770 |
},
|
25771 |
+
"total_flos": 8.582232535864443e+17,
|
25772 |
"train_batch_size": 2,
|
25773 |
"trial_name": null,
|
25774 |
"trial_params": null
|