Training in progress, step 8000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +766 -2
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 738367848
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f6fca18b3d6839cfa4f9b00cec6f979a279d6161ccf0e227ea2f0e6664d6d3e
|
3 |
size 738367848
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1476823354
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:138d6cf3c8fe05fea07df883537101df6a3d38e7d05cbcc03796a983de350576
|
3 |
size 1476823354
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28cdaddb959868042b846248e699766aefc2fadab97732661ad902989f1034df
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f01a643a1ae2b83dd1c19bc6b73325f7e12cc5322058a11111e293dc5b31ae9d
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a90f4546ff0a4d9c836b2695bc4b1ddad6eb64e578565dd4c83c3a0c3672df7
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:649b5fa0e92e74982a79e3759794b1cfec60cf9441738902668d54e2ffe1767b
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9e8b7d006141b3943e31b1b95143c70d5c410839f60e8892c3ebb5474fa5b82
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab8e9d82889b9d58c21adc3199b61dc25e089ed0456cd04a5834b8213920db8d
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d61bbe5a4669c770dea677fdd22d95a5f9a1874c146a203a6de6b923066699e2
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:208e36b51f1fe5107b8000b99406d4ff1bd7e95578591bc1f581b4593f80e4c6
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81e0e2c967dab9f9c48f59c1d3cd0a40f676964ec54c91035ecabb3e1c2f4b45
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 250,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5355,6 +5355,770 @@
|
|
5355 |
"eval_spearman_manhattan": 0.7397995971405482,
|
5356 |
"eval_steps_per_second": 8.263,
|
5357 |
"step": 7000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5358 |
}
|
5359 |
],
|
5360 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.497656982193065,
|
5 |
"eval_steps": 250,
|
6 |
+
"global_step": 8000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5355 |
"eval_spearman_manhattan": 0.7397995971405482,
|
5356 |
"eval_steps_per_second": 8.263,
|
5357 |
"step": 7000
|
5358 |
+
},
|
5359 |
+
{
|
5360 |
+
"epoch": 6.569821930646673,
|
5361 |
+
"grad_norm": 1.1269482374191284,
|
5362 |
+
"learning_rate": 9.948674017777388e-06,
|
5363 |
+
"loss": 0.0395,
|
5364 |
+
"step": 7010
|
5365 |
+
},
|
5366 |
+
{
|
5367 |
+
"epoch": 6.579194001874415,
|
5368 |
+
"grad_norm": 0.8978859782218933,
|
5369 |
+
"learning_rate": 9.948600799543118e-06,
|
5370 |
+
"loss": 0.0438,
|
5371 |
+
"step": 7020
|
5372 |
+
},
|
5373 |
+
{
|
5374 |
+
"epoch": 6.588566073102156,
|
5375 |
+
"grad_norm": 1.3999450206756592,
|
5376 |
+
"learning_rate": 9.94852758130885e-06,
|
5377 |
+
"loss": 0.0466,
|
5378 |
+
"step": 7030
|
5379 |
+
},
|
5380 |
+
{
|
5381 |
+
"epoch": 6.597938144329897,
|
5382 |
+
"grad_norm": 0.985998272895813,
|
5383 |
+
"learning_rate": 9.948454363074582e-06,
|
5384 |
+
"loss": 0.0474,
|
5385 |
+
"step": 7040
|
5386 |
+
},
|
5387 |
+
{
|
5388 |
+
"epoch": 6.607310215557638,
|
5389 |
+
"grad_norm": 0.7843828797340393,
|
5390 |
+
"learning_rate": 9.948381144840312e-06,
|
5391 |
+
"loss": 0.0417,
|
5392 |
+
"step": 7050
|
5393 |
+
},
|
5394 |
+
{
|
5395 |
+
"epoch": 6.616682286785379,
|
5396 |
+
"grad_norm": 1.64656400680542,
|
5397 |
+
"learning_rate": 9.948307926606043e-06,
|
5398 |
+
"loss": 0.045,
|
5399 |
+
"step": 7060
|
5400 |
+
},
|
5401 |
+
{
|
5402 |
+
"epoch": 6.626054358013121,
|
5403 |
+
"grad_norm": 0.6348075866699219,
|
5404 |
+
"learning_rate": 9.948234708371774e-06,
|
5405 |
+
"loss": 0.0501,
|
5406 |
+
"step": 7070
|
5407 |
+
},
|
5408 |
+
{
|
5409 |
+
"epoch": 6.635426429240862,
|
5410 |
+
"grad_norm": 1.8781590461730957,
|
5411 |
+
"learning_rate": 9.948161490137505e-06,
|
5412 |
+
"loss": 0.0445,
|
5413 |
+
"step": 7080
|
5414 |
+
},
|
5415 |
+
{
|
5416 |
+
"epoch": 6.644798500468603,
|
5417 |
+
"grad_norm": 1.0441402196884155,
|
5418 |
+
"learning_rate": 9.948088271903235e-06,
|
5419 |
+
"loss": 0.0457,
|
5420 |
+
"step": 7090
|
5421 |
+
},
|
5422 |
+
{
|
5423 |
+
"epoch": 6.654170571696345,
|
5424 |
+
"grad_norm": 1.2460689544677734,
|
5425 |
+
"learning_rate": 9.948015053668966e-06,
|
5426 |
+
"loss": 0.0471,
|
5427 |
+
"step": 7100
|
5428 |
+
},
|
5429 |
+
{
|
5430 |
+
"epoch": 6.663542642924086,
|
5431 |
+
"grad_norm": 0.993414580821991,
|
5432 |
+
"learning_rate": 9.947941835434698e-06,
|
5433 |
+
"loss": 0.0423,
|
5434 |
+
"step": 7110
|
5435 |
+
},
|
5436 |
+
{
|
5437 |
+
"epoch": 6.672914714151828,
|
5438 |
+
"grad_norm": 1.2848552465438843,
|
5439 |
+
"learning_rate": 9.947868617200428e-06,
|
5440 |
+
"loss": 0.0414,
|
5441 |
+
"step": 7120
|
5442 |
+
},
|
5443 |
+
{
|
5444 |
+
"epoch": 6.682286785379569,
|
5445 |
+
"grad_norm": 1.2903103828430176,
|
5446 |
+
"learning_rate": 9.947795398966158e-06,
|
5447 |
+
"loss": 0.0402,
|
5448 |
+
"step": 7130
|
5449 |
+
},
|
5450 |
+
{
|
5451 |
+
"epoch": 6.69165885660731,
|
5452 |
+
"grad_norm": 1.2319235801696777,
|
5453 |
+
"learning_rate": 9.94772218073189e-06,
|
5454 |
+
"loss": 0.0504,
|
5455 |
+
"step": 7140
|
5456 |
+
},
|
5457 |
+
{
|
5458 |
+
"epoch": 6.701030927835052,
|
5459 |
+
"grad_norm": 0.8465273976325989,
|
5460 |
+
"learning_rate": 9.947648962497621e-06,
|
5461 |
+
"loss": 0.0409,
|
5462 |
+
"step": 7150
|
5463 |
+
},
|
5464 |
+
{
|
5465 |
+
"epoch": 6.710402999062793,
|
5466 |
+
"grad_norm": 1.186928153038025,
|
5467 |
+
"learning_rate": 9.947575744263352e-06,
|
5468 |
+
"loss": 0.0458,
|
5469 |
+
"step": 7160
|
5470 |
+
},
|
5471 |
+
{
|
5472 |
+
"epoch": 6.719775070290535,
|
5473 |
+
"grad_norm": 1.3528752326965332,
|
5474 |
+
"learning_rate": 9.947502526029083e-06,
|
5475 |
+
"loss": 0.0433,
|
5476 |
+
"step": 7170
|
5477 |
+
},
|
5478 |
+
{
|
5479 |
+
"epoch": 6.7291471415182755,
|
5480 |
+
"grad_norm": 0.8908892273902893,
|
5481 |
+
"learning_rate": 9.947429307794814e-06,
|
5482 |
+
"loss": 0.0456,
|
5483 |
+
"step": 7180
|
5484 |
+
},
|
5485 |
+
{
|
5486 |
+
"epoch": 6.7385192127460165,
|
5487 |
+
"grad_norm": 1.1235069036483765,
|
5488 |
+
"learning_rate": 9.947356089560544e-06,
|
5489 |
+
"loss": 0.0481,
|
5490 |
+
"step": 7190
|
5491 |
+
},
|
5492 |
+
{
|
5493 |
+
"epoch": 6.747891283973758,
|
5494 |
+
"grad_norm": 1.6809895038604736,
|
5495 |
+
"learning_rate": 9.947282871326275e-06,
|
5496 |
+
"loss": 0.0454,
|
5497 |
+
"step": 7200
|
5498 |
+
},
|
5499 |
+
{
|
5500 |
+
"epoch": 6.757263355201499,
|
5501 |
+
"grad_norm": 0.8632039427757263,
|
5502 |
+
"learning_rate": 9.947209653092008e-06,
|
5503 |
+
"loss": 0.0481,
|
5504 |
+
"step": 7210
|
5505 |
+
},
|
5506 |
+
{
|
5507 |
+
"epoch": 6.766635426429241,
|
5508 |
+
"grad_norm": 1.2185996770858765,
|
5509 |
+
"learning_rate": 9.947136434857738e-06,
|
5510 |
+
"loss": 0.0383,
|
5511 |
+
"step": 7220
|
5512 |
+
},
|
5513 |
+
{
|
5514 |
+
"epoch": 6.776007497656982,
|
5515 |
+
"grad_norm": 0.6979696154594421,
|
5516 |
+
"learning_rate": 9.947063216623467e-06,
|
5517 |
+
"loss": 0.0435,
|
5518 |
+
"step": 7230
|
5519 |
+
},
|
5520 |
+
{
|
5521 |
+
"epoch": 6.785379568884723,
|
5522 |
+
"grad_norm": 1.459441065788269,
|
5523 |
+
"learning_rate": 9.9469899983892e-06,
|
5524 |
+
"loss": 0.0449,
|
5525 |
+
"step": 7240
|
5526 |
+
},
|
5527 |
+
{
|
5528 |
+
"epoch": 6.794751640112465,
|
5529 |
+
"grad_norm": 1.0957977771759033,
|
5530 |
+
"learning_rate": 9.94691678015493e-06,
|
5531 |
+
"loss": 0.032,
|
5532 |
+
"step": 7250
|
5533 |
+
},
|
5534 |
+
{
|
5535 |
+
"epoch": 6.794751640112465,
|
5536 |
+
"eval_loss": 0.03765299916267395,
|
5537 |
+
"eval_pearson_cosine": 0.7692482471466064,
|
5538 |
+
"eval_pearson_dot": 0.722366452217102,
|
5539 |
+
"eval_pearson_euclidean": 0.7316011190414429,
|
5540 |
+
"eval_pearson_manhattan": 0.7333144545555115,
|
5541 |
+
"eval_runtime": 22.5438,
|
5542 |
+
"eval_samples_per_second": 66.537,
|
5543 |
+
"eval_spearman_cosine": 0.7695046405395065,
|
5544 |
+
"eval_spearman_dot": 0.7242050912795406,
|
5545 |
+
"eval_spearman_euclidean": 0.7356828429817377,
|
5546 |
+
"eval_spearman_manhattan": 0.737487116385034,
|
5547 |
+
"eval_steps_per_second": 8.339,
|
5548 |
+
"step": 7250
|
5549 |
+
},
|
5550 |
+
{
|
5551 |
+
"epoch": 6.804123711340206,
|
5552 |
+
"grad_norm": 1.377066731452942,
|
5553 |
+
"learning_rate": 9.946843561920661e-06,
|
5554 |
+
"loss": 0.0529,
|
5555 |
+
"step": 7260
|
5556 |
+
},
|
5557 |
+
{
|
5558 |
+
"epoch": 6.813495782567948,
|
5559 |
+
"grad_norm": 0.714728057384491,
|
5560 |
+
"learning_rate": 9.946770343686392e-06,
|
5561 |
+
"loss": 0.0432,
|
5562 |
+
"step": 7270
|
5563 |
+
},
|
5564 |
+
{
|
5565 |
+
"epoch": 6.822867853795689,
|
5566 |
+
"grad_norm": 1.4324384927749634,
|
5567 |
+
"learning_rate": 9.946697125452125e-06,
|
5568 |
+
"loss": 0.046,
|
5569 |
+
"step": 7280
|
5570 |
+
},
|
5571 |
+
{
|
5572 |
+
"epoch": 6.83223992502343,
|
5573 |
+
"grad_norm": 1.2564704418182373,
|
5574 |
+
"learning_rate": 9.946623907217854e-06,
|
5575 |
+
"loss": 0.046,
|
5576 |
+
"step": 7290
|
5577 |
+
},
|
5578 |
+
{
|
5579 |
+
"epoch": 6.841611996251172,
|
5580 |
+
"grad_norm": 0.8522197008132935,
|
5581 |
+
"learning_rate": 9.946550688983584e-06,
|
5582 |
+
"loss": 0.0393,
|
5583 |
+
"step": 7300
|
5584 |
+
},
|
5585 |
+
{
|
5586 |
+
"epoch": 6.850984067478913,
|
5587 |
+
"grad_norm": 0.8751912117004395,
|
5588 |
+
"learning_rate": 9.946477470749317e-06,
|
5589 |
+
"loss": 0.0426,
|
5590 |
+
"step": 7310
|
5591 |
+
},
|
5592 |
+
{
|
5593 |
+
"epoch": 6.8603561387066545,
|
5594 |
+
"grad_norm": 0.8960391879081726,
|
5595 |
+
"learning_rate": 9.946404252515048e-06,
|
5596 |
+
"loss": 0.0445,
|
5597 |
+
"step": 7320
|
5598 |
+
},
|
5599 |
+
{
|
5600 |
+
"epoch": 6.8697282099343955,
|
5601 |
+
"grad_norm": 1.092128872871399,
|
5602 |
+
"learning_rate": 9.946331034280778e-06,
|
5603 |
+
"loss": 0.0459,
|
5604 |
+
"step": 7330
|
5605 |
+
},
|
5606 |
+
{
|
5607 |
+
"epoch": 6.8791002811621365,
|
5608 |
+
"grad_norm": 1.1840777397155762,
|
5609 |
+
"learning_rate": 9.946257816046509e-06,
|
5610 |
+
"loss": 0.0387,
|
5611 |
+
"step": 7340
|
5612 |
+
},
|
5613 |
+
{
|
5614 |
+
"epoch": 6.888472352389878,
|
5615 |
+
"grad_norm": 1.0283764600753784,
|
5616 |
+
"learning_rate": 9.94618459781224e-06,
|
5617 |
+
"loss": 0.0577,
|
5618 |
+
"step": 7350
|
5619 |
+
},
|
5620 |
+
{
|
5621 |
+
"epoch": 6.897844423617619,
|
5622 |
+
"grad_norm": 0.749761164188385,
|
5623 |
+
"learning_rate": 9.94611137957797e-06,
|
5624 |
+
"loss": 0.0414,
|
5625 |
+
"step": 7360
|
5626 |
+
},
|
5627 |
+
{
|
5628 |
+
"epoch": 6.907216494845361,
|
5629 |
+
"grad_norm": 0.8442000150680542,
|
5630 |
+
"learning_rate": 9.946038161343701e-06,
|
5631 |
+
"loss": 0.046,
|
5632 |
+
"step": 7370
|
5633 |
+
},
|
5634 |
+
{
|
5635 |
+
"epoch": 6.916588566073102,
|
5636 |
+
"grad_norm": 1.2296583652496338,
|
5637 |
+
"learning_rate": 9.945964943109432e-06,
|
5638 |
+
"loss": 0.0412,
|
5639 |
+
"step": 7380
|
5640 |
+
},
|
5641 |
+
{
|
5642 |
+
"epoch": 6.925960637300843,
|
5643 |
+
"grad_norm": 0.6515626311302185,
|
5644 |
+
"learning_rate": 9.945891724875165e-06,
|
5645 |
+
"loss": 0.0481,
|
5646 |
+
"step": 7390
|
5647 |
+
},
|
5648 |
+
{
|
5649 |
+
"epoch": 6.935332708528585,
|
5650 |
+
"grad_norm": 1.8992091417312622,
|
5651 |
+
"learning_rate": 9.945818506640895e-06,
|
5652 |
+
"loss": 0.0431,
|
5653 |
+
"step": 7400
|
5654 |
+
},
|
5655 |
+
{
|
5656 |
+
"epoch": 6.944704779756326,
|
5657 |
+
"grad_norm": 1.1663875579833984,
|
5658 |
+
"learning_rate": 9.945745288406624e-06,
|
5659 |
+
"loss": 0.0459,
|
5660 |
+
"step": 7410
|
5661 |
+
},
|
5662 |
+
{
|
5663 |
+
"epoch": 6.954076850984068,
|
5664 |
+
"grad_norm": 0.6695976853370667,
|
5665 |
+
"learning_rate": 9.945672070172357e-06,
|
5666 |
+
"loss": 0.0448,
|
5667 |
+
"step": 7420
|
5668 |
+
},
|
5669 |
+
{
|
5670 |
+
"epoch": 6.963448922211809,
|
5671 |
+
"grad_norm": 1.158563494682312,
|
5672 |
+
"learning_rate": 9.945598851938088e-06,
|
5673 |
+
"loss": 0.0398,
|
5674 |
+
"step": 7430
|
5675 |
+
},
|
5676 |
+
{
|
5677 |
+
"epoch": 6.97282099343955,
|
5678 |
+
"grad_norm": 1.2068713903427124,
|
5679 |
+
"learning_rate": 9.945525633703818e-06,
|
5680 |
+
"loss": 0.0443,
|
5681 |
+
"step": 7440
|
5682 |
+
},
|
5683 |
+
{
|
5684 |
+
"epoch": 6.982193064667292,
|
5685 |
+
"grad_norm": 0.9688456654548645,
|
5686 |
+
"learning_rate": 9.945452415469549e-06,
|
5687 |
+
"loss": 0.0452,
|
5688 |
+
"step": 7450
|
5689 |
+
},
|
5690 |
+
{
|
5691 |
+
"epoch": 6.991565135895033,
|
5692 |
+
"grad_norm": 1.5483156442642212,
|
5693 |
+
"learning_rate": 9.94537919723528e-06,
|
5694 |
+
"loss": 0.0498,
|
5695 |
+
"step": 7460
|
5696 |
+
},
|
5697 |
+
{
|
5698 |
+
"epoch": 7.0009372071227745,
|
5699 |
+
"grad_norm": 1.18287193775177,
|
5700 |
+
"learning_rate": 9.94530597900101e-06,
|
5701 |
+
"loss": 0.0445,
|
5702 |
+
"step": 7470
|
5703 |
+
},
|
5704 |
+
{
|
5705 |
+
"epoch": 7.010309278350515,
|
5706 |
+
"grad_norm": 0.7765620946884155,
|
5707 |
+
"learning_rate": 9.945232760766741e-06,
|
5708 |
+
"loss": 0.0346,
|
5709 |
+
"step": 7480
|
5710 |
+
},
|
5711 |
+
{
|
5712 |
+
"epoch": 7.019681349578256,
|
5713 |
+
"grad_norm": 0.948760986328125,
|
5714 |
+
"learning_rate": 9.945159542532474e-06,
|
5715 |
+
"loss": 0.0348,
|
5716 |
+
"step": 7490
|
5717 |
+
},
|
5718 |
+
{
|
5719 |
+
"epoch": 7.029053420805998,
|
5720 |
+
"grad_norm": 0.9965664744377136,
|
5721 |
+
"learning_rate": 9.945086324298205e-06,
|
5722 |
+
"loss": 0.0342,
|
5723 |
+
"step": 7500
|
5724 |
+
},
|
5725 |
+
{
|
5726 |
+
"epoch": 7.029053420805998,
|
5727 |
+
"eval_loss": 0.03782695531845093,
|
5728 |
+
"eval_pearson_cosine": 0.768491804599762,
|
5729 |
+
"eval_pearson_dot": 0.7183945775032043,
|
5730 |
+
"eval_pearson_euclidean": 0.7320147752761841,
|
5731 |
+
"eval_pearson_manhattan": 0.7333334684371948,
|
5732 |
+
"eval_runtime": 21.6515,
|
5733 |
+
"eval_samples_per_second": 69.279,
|
5734 |
+
"eval_spearman_cosine": 0.7677979499645443,
|
5735 |
+
"eval_spearman_dot": 0.7186610110098233,
|
5736 |
+
"eval_spearman_euclidean": 0.7364530110375347,
|
5737 |
+
"eval_spearman_manhattan": 0.737620665225201,
|
5738 |
+
"eval_steps_per_second": 8.683,
|
5739 |
+
"step": 7500
|
5740 |
+
},
|
5741 |
+
{
|
5742 |
+
"epoch": 7.038425492033739,
|
5743 |
+
"grad_norm": 0.8594346046447754,
|
5744 |
+
"learning_rate": 9.945013106063935e-06,
|
5745 |
+
"loss": 0.0318,
|
5746 |
+
"step": 7510
|
5747 |
+
},
|
5748 |
+
{
|
5749 |
+
"epoch": 7.047797563261481,
|
5750 |
+
"grad_norm": 1.62812340259552,
|
5751 |
+
"learning_rate": 9.944939887829666e-06,
|
5752 |
+
"loss": 0.0414,
|
5753 |
+
"step": 7520
|
5754 |
+
},
|
5755 |
+
{
|
5756 |
+
"epoch": 7.057169634489222,
|
5757 |
+
"grad_norm": 1.1017098426818848,
|
5758 |
+
"learning_rate": 9.944866669595397e-06,
|
5759 |
+
"loss": 0.0327,
|
5760 |
+
"step": 7530
|
5761 |
+
},
|
5762 |
+
{
|
5763 |
+
"epoch": 7.066541705716963,
|
5764 |
+
"grad_norm": 0.8536505699157715,
|
5765 |
+
"learning_rate": 9.944793451361128e-06,
|
5766 |
+
"loss": 0.0286,
|
5767 |
+
"step": 7540
|
5768 |
+
},
|
5769 |
+
{
|
5770 |
+
"epoch": 7.075913776944705,
|
5771 |
+
"grad_norm": 1.0389901399612427,
|
5772 |
+
"learning_rate": 9.944720233126858e-06,
|
5773 |
+
"loss": 0.0365,
|
5774 |
+
"step": 7550
|
5775 |
+
},
|
5776 |
+
{
|
5777 |
+
"epoch": 7.085285848172446,
|
5778 |
+
"grad_norm": 1.0682491064071655,
|
5779 |
+
"learning_rate": 9.94464701489259e-06,
|
5780 |
+
"loss": 0.034,
|
5781 |
+
"step": 7560
|
5782 |
+
},
|
5783 |
+
{
|
5784 |
+
"epoch": 7.094657919400188,
|
5785 |
+
"grad_norm": 0.8786489963531494,
|
5786 |
+
"learning_rate": 9.944573796658321e-06,
|
5787 |
+
"loss": 0.0373,
|
5788 |
+
"step": 7570
|
5789 |
+
},
|
5790 |
+
{
|
5791 |
+
"epoch": 7.104029990627929,
|
5792 |
+
"grad_norm": 1.3642008304595947,
|
5793 |
+
"learning_rate": 9.94450057842405e-06,
|
5794 |
+
"loss": 0.0314,
|
5795 |
+
"step": 7580
|
5796 |
+
},
|
5797 |
+
{
|
5798 |
+
"epoch": 7.11340206185567,
|
5799 |
+
"grad_norm": 0.7243325114250183,
|
5800 |
+
"learning_rate": 9.944427360189783e-06,
|
5801 |
+
"loss": 0.0299,
|
5802 |
+
"step": 7590
|
5803 |
+
},
|
5804 |
+
{
|
5805 |
+
"epoch": 7.122774133083412,
|
5806 |
+
"grad_norm": 0.6696385145187378,
|
5807 |
+
"learning_rate": 9.944354141955514e-06,
|
5808 |
+
"loss": 0.0311,
|
5809 |
+
"step": 7600
|
5810 |
+
},
|
5811 |
+
{
|
5812 |
+
"epoch": 7.1321462043111525,
|
5813 |
+
"grad_norm": 1.03152334690094,
|
5814 |
+
"learning_rate": 9.944280923721244e-06,
|
5815 |
+
"loss": 0.0355,
|
5816 |
+
"step": 7610
|
5817 |
+
},
|
5818 |
+
{
|
5819 |
+
"epoch": 7.141518275538894,
|
5820 |
+
"grad_norm": 0.8586616516113281,
|
5821 |
+
"learning_rate": 9.944207705486975e-06,
|
5822 |
+
"loss": 0.0394,
|
5823 |
+
"step": 7620
|
5824 |
+
},
|
5825 |
+
{
|
5826 |
+
"epoch": 7.150890346766635,
|
5827 |
+
"grad_norm": 0.9514285922050476,
|
5828 |
+
"learning_rate": 9.944134487252706e-06,
|
5829 |
+
"loss": 0.035,
|
5830 |
+
"step": 7630
|
5831 |
+
},
|
5832 |
+
{
|
5833 |
+
"epoch": 7.160262417994376,
|
5834 |
+
"grad_norm": 0.8053460717201233,
|
5835 |
+
"learning_rate": 9.944061269018437e-06,
|
5836 |
+
"loss": 0.0312,
|
5837 |
+
"step": 7640
|
5838 |
+
},
|
5839 |
+
{
|
5840 |
+
"epoch": 7.169634489222118,
|
5841 |
+
"grad_norm": 1.0056674480438232,
|
5842 |
+
"learning_rate": 9.943988050784167e-06,
|
5843 |
+
"loss": 0.0371,
|
5844 |
+
"step": 7650
|
5845 |
+
},
|
5846 |
+
{
|
5847 |
+
"epoch": 7.179006560449859,
|
5848 |
+
"grad_norm": 0.7738359570503235,
|
5849 |
+
"learning_rate": 9.943914832549898e-06,
|
5850 |
+
"loss": 0.0302,
|
5851 |
+
"step": 7660
|
5852 |
+
},
|
5853 |
+
{
|
5854 |
+
"epoch": 7.188378631677601,
|
5855 |
+
"grad_norm": 1.039197325706482,
|
5856 |
+
"learning_rate": 9.94384161431563e-06,
|
5857 |
+
"loss": 0.0316,
|
5858 |
+
"step": 7670
|
5859 |
+
},
|
5860 |
+
{
|
5861 |
+
"epoch": 7.197750702905342,
|
5862 |
+
"grad_norm": 1.578165888786316,
|
5863 |
+
"learning_rate": 9.943768396081361e-06,
|
5864 |
+
"loss": 0.0388,
|
5865 |
+
"step": 7680
|
5866 |
+
},
|
5867 |
+
{
|
5868 |
+
"epoch": 7.207122774133083,
|
5869 |
+
"grad_norm": 1.1753205060958862,
|
5870 |
+
"learning_rate": 9.943695177847092e-06,
|
5871 |
+
"loss": 0.0387,
|
5872 |
+
"step": 7690
|
5873 |
+
},
|
5874 |
+
{
|
5875 |
+
"epoch": 7.216494845360825,
|
5876 |
+
"grad_norm": 1.295299768447876,
|
5877 |
+
"learning_rate": 9.943621959612823e-06,
|
5878 |
+
"loss": 0.0417,
|
5879 |
+
"step": 7700
|
5880 |
+
},
|
5881 |
+
{
|
5882 |
+
"epoch": 7.225866916588566,
|
5883 |
+
"grad_norm": 0.9477363228797913,
|
5884 |
+
"learning_rate": 9.943548741378554e-06,
|
5885 |
+
"loss": 0.0305,
|
5886 |
+
"step": 7710
|
5887 |
+
},
|
5888 |
+
{
|
5889 |
+
"epoch": 7.235238987816308,
|
5890 |
+
"grad_norm": 1.0547223091125488,
|
5891 |
+
"learning_rate": 9.943475523144284e-06,
|
5892 |
+
"loss": 0.0314,
|
5893 |
+
"step": 7720
|
5894 |
+
},
|
5895 |
+
{
|
5896 |
+
"epoch": 7.244611059044049,
|
5897 |
+
"grad_norm": 1.4873117208480835,
|
5898 |
+
"learning_rate": 9.943402304910015e-06,
|
5899 |
+
"loss": 0.0302,
|
5900 |
+
"step": 7730
|
5901 |
+
},
|
5902 |
+
{
|
5903 |
+
"epoch": 7.25398313027179,
|
5904 |
+
"grad_norm": 0.9882778525352478,
|
5905 |
+
"learning_rate": 9.943329086675748e-06,
|
5906 |
+
"loss": 0.0328,
|
5907 |
+
"step": 7740
|
5908 |
+
},
|
5909 |
+
{
|
5910 |
+
"epoch": 7.2633552014995315,
|
5911 |
+
"grad_norm": 1.3187719583511353,
|
5912 |
+
"learning_rate": 9.943255868441477e-06,
|
5913 |
+
"loss": 0.0341,
|
5914 |
+
"step": 7750
|
5915 |
+
},
|
5916 |
+
{
|
5917 |
+
"epoch": 7.2633552014995315,
|
5918 |
+
"eval_loss": 0.03773624449968338,
|
5919 |
+
"eval_pearson_cosine": 0.7699387073516846,
|
5920 |
+
"eval_pearson_dot": 0.7237234115600586,
|
5921 |
+
"eval_pearson_euclidean": 0.7316513061523438,
|
5922 |
+
"eval_pearson_manhattan": 0.7335678339004517,
|
5923 |
+
"eval_runtime": 22.1612,
|
5924 |
+
"eval_samples_per_second": 67.686,
|
5925 |
+
"eval_spearman_cosine": 0.7694615753118931,
|
5926 |
+
"eval_spearman_dot": 0.7243788947148158,
|
5927 |
+
"eval_spearman_euclidean": 0.7361849268567764,
|
5928 |
+
"eval_spearman_manhattan": 0.7377945356892571,
|
5929 |
+
"eval_steps_per_second": 8.483,
|
5930 |
+
"step": 7750
|
5931 |
+
},
|
5932 |
+
{
|
5933 |
+
"epoch": 7.2727272727272725,
|
5934 |
+
"grad_norm": 1.0984870195388794,
|
5935 |
+
"learning_rate": 9.943182650207207e-06,
|
5936 |
+
"loss": 0.0329,
|
5937 |
+
"step": 7760
|
5938 |
+
},
|
5939 |
+
{
|
5940 |
+
"epoch": 7.282099343955014,
|
5941 |
+
"grad_norm": 0.7666100263595581,
|
5942 |
+
"learning_rate": 9.94310943197294e-06,
|
5943 |
+
"loss": 0.0358,
|
5944 |
+
"step": 7770
|
5945 |
+
},
|
5946 |
+
{
|
5947 |
+
"epoch": 7.291471415182755,
|
5948 |
+
"grad_norm": 0.9941838383674622,
|
5949 |
+
"learning_rate": 9.94303621373867e-06,
|
5950 |
+
"loss": 0.0351,
|
5951 |
+
"step": 7780
|
5952 |
+
},
|
5953 |
+
{
|
5954 |
+
"epoch": 7.300843486410496,
|
5955 |
+
"grad_norm": 1.3012335300445557,
|
5956 |
+
"learning_rate": 9.942962995504401e-06,
|
5957 |
+
"loss": 0.0296,
|
5958 |
+
"step": 7790
|
5959 |
+
},
|
5960 |
+
{
|
5961 |
+
"epoch": 7.310215557638238,
|
5962 |
+
"grad_norm": 1.1914719343185425,
|
5963 |
+
"learning_rate": 9.942889777270132e-06,
|
5964 |
+
"loss": 0.0333,
|
5965 |
+
"step": 7800
|
5966 |
+
},
|
5967 |
+
{
|
5968 |
+
"epoch": 7.319587628865979,
|
5969 |
+
"grad_norm": 1.1405929327011108,
|
5970 |
+
"learning_rate": 9.942816559035863e-06,
|
5971 |
+
"loss": 0.0408,
|
5972 |
+
"step": 7810
|
5973 |
+
},
|
5974 |
+
{
|
5975 |
+
"epoch": 7.328959700093721,
|
5976 |
+
"grad_norm": 0.665600061416626,
|
5977 |
+
"learning_rate": 9.942743340801594e-06,
|
5978 |
+
"loss": 0.0314,
|
5979 |
+
"step": 7820
|
5980 |
+
},
|
5981 |
+
{
|
5982 |
+
"epoch": 7.338331771321462,
|
5983 |
+
"grad_norm": 1.2029966115951538,
|
5984 |
+
"learning_rate": 9.942670122567324e-06,
|
5985 |
+
"loss": 0.041,
|
5986 |
+
"step": 7830
|
5987 |
+
},
|
5988 |
+
{
|
5989 |
+
"epoch": 7.347703842549203,
|
5990 |
+
"grad_norm": 0.44810751080513,
|
5991 |
+
"learning_rate": 9.942596904333057e-06,
|
5992 |
+
"loss": 0.0317,
|
5993 |
+
"step": 7840
|
5994 |
+
},
|
5995 |
+
{
|
5996 |
+
"epoch": 7.357075913776945,
|
5997 |
+
"grad_norm": 1.565082311630249,
|
5998 |
+
"learning_rate": 9.942523686098788e-06,
|
5999 |
+
"loss": 0.035,
|
6000 |
+
"step": 7850
|
6001 |
+
},
|
6002 |
+
{
|
6003 |
+
"epoch": 7.366447985004686,
|
6004 |
+
"grad_norm": 1.6850316524505615,
|
6005 |
+
"learning_rate": 9.942450467864517e-06,
|
6006 |
+
"loss": 0.0365,
|
6007 |
+
"step": 7860
|
6008 |
+
},
|
6009 |
+
{
|
6010 |
+
"epoch": 7.375820056232428,
|
6011 |
+
"grad_norm": 1.0027261972427368,
|
6012 |
+
"learning_rate": 9.942377249630249e-06,
|
6013 |
+
"loss": 0.0309,
|
6014 |
+
"step": 7870
|
6015 |
+
},
|
6016 |
+
{
|
6017 |
+
"epoch": 7.385192127460169,
|
6018 |
+
"grad_norm": 0.51674485206604,
|
6019 |
+
"learning_rate": 9.94230403139598e-06,
|
6020 |
+
"loss": 0.0321,
|
6021 |
+
"step": 7880
|
6022 |
+
},
|
6023 |
+
{
|
6024 |
+
"epoch": 7.39456419868791,
|
6025 |
+
"grad_norm": 1.0429599285125732,
|
6026 |
+
"learning_rate": 9.94223081316171e-06,
|
6027 |
+
"loss": 0.033,
|
6028 |
+
"step": 7890
|
6029 |
+
},
|
6030 |
+
{
|
6031 |
+
"epoch": 7.4039362699156515,
|
6032 |
+
"grad_norm": 0.618232250213623,
|
6033 |
+
"learning_rate": 9.942157594927441e-06,
|
6034 |
+
"loss": 0.0353,
|
6035 |
+
"step": 7900
|
6036 |
+
},
|
6037 |
+
{
|
6038 |
+
"epoch": 7.413308341143392,
|
6039 |
+
"grad_norm": 0.9780518412590027,
|
6040 |
+
"learning_rate": 9.942084376693174e-06,
|
6041 |
+
"loss": 0.0354,
|
6042 |
+
"step": 7910
|
6043 |
+
},
|
6044 |
+
{
|
6045 |
+
"epoch": 7.422680412371134,
|
6046 |
+
"grad_norm": 1.214362621307373,
|
6047 |
+
"learning_rate": 9.942011158458903e-06,
|
6048 |
+
"loss": 0.0338,
|
6049 |
+
"step": 7920
|
6050 |
+
},
|
6051 |
+
{
|
6052 |
+
"epoch": 7.432052483598875,
|
6053 |
+
"grad_norm": 1.202986240386963,
|
6054 |
+
"learning_rate": 9.941937940224634e-06,
|
6055 |
+
"loss": 0.0387,
|
6056 |
+
"step": 7930
|
6057 |
+
},
|
6058 |
+
{
|
6059 |
+
"epoch": 7.441424554826616,
|
6060 |
+
"grad_norm": 1.4128488302230835,
|
6061 |
+
"learning_rate": 9.941864721990366e-06,
|
6062 |
+
"loss": 0.0315,
|
6063 |
+
"step": 7940
|
6064 |
+
},
|
6065 |
+
{
|
6066 |
+
"epoch": 7.450796626054358,
|
6067 |
+
"grad_norm": 0.7198026180267334,
|
6068 |
+
"learning_rate": 9.941791503756097e-06,
|
6069 |
+
"loss": 0.0338,
|
6070 |
+
"step": 7950
|
6071 |
+
},
|
6072 |
+
{
|
6073 |
+
"epoch": 7.460168697282099,
|
6074 |
+
"grad_norm": 1.1124250888824463,
|
6075 |
+
"learning_rate": 9.941718285521828e-06,
|
6076 |
+
"loss": 0.0352,
|
6077 |
+
"step": 7960
|
6078 |
+
},
|
6079 |
+
{
|
6080 |
+
"epoch": 7.469540768509841,
|
6081 |
+
"grad_norm": 1.0420817136764526,
|
6082 |
+
"learning_rate": 9.941645067287558e-06,
|
6083 |
+
"loss": 0.0338,
|
6084 |
+
"step": 7970
|
6085 |
+
},
|
6086 |
+
{
|
6087 |
+
"epoch": 7.478912839737582,
|
6088 |
+
"grad_norm": 0.9638373255729675,
|
6089 |
+
"learning_rate": 9.941571849053289e-06,
|
6090 |
+
"loss": 0.0356,
|
6091 |
+
"step": 7980
|
6092 |
+
},
|
6093 |
+
{
|
6094 |
+
"epoch": 7.488284910965323,
|
6095 |
+
"grad_norm": 0.8584896922111511,
|
6096 |
+
"learning_rate": 9.94149863081902e-06,
|
6097 |
+
"loss": 0.0353,
|
6098 |
+
"step": 7990
|
6099 |
+
},
|
6100 |
+
{
|
6101 |
+
"epoch": 7.497656982193065,
|
6102 |
+
"grad_norm": 0.7161556482315063,
|
6103 |
+
"learning_rate": 9.94142541258475e-06,
|
6104 |
+
"loss": 0.0329,
|
6105 |
+
"step": 8000
|
6106 |
+
},
|
6107 |
+
{
|
6108 |
+
"epoch": 7.497656982193065,
|
6109 |
+
"eval_loss": 0.03753030672669411,
|
6110 |
+
"eval_pearson_cosine": 0.7705868482589722,
|
6111 |
+
"eval_pearson_dot": 0.7248358726501465,
|
6112 |
+
"eval_pearson_euclidean": 0.734631359577179,
|
6113 |
+
"eval_pearson_manhattan": 0.7363988161087036,
|
6114 |
+
"eval_runtime": 22.3628,
|
6115 |
+
"eval_samples_per_second": 67.076,
|
6116 |
+
"eval_spearman_cosine": 0.769708288306187,
|
6117 |
+
"eval_spearman_dot": 0.7249767839130733,
|
6118 |
+
"eval_spearman_euclidean": 0.7394619718544255,
|
6119 |
+
"eval_spearman_manhattan": 0.7409361299302836,
|
6120 |
+
"eval_steps_per_second": 8.407,
|
6121 |
+
"step": 8000
|
6122 |
}
|
6123 |
],
|
6124 |
"logging_steps": 10,
|