Upload checkpoint 3150
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +122 -3
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4957560304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57dd35bc2f6ffb39aa8b1f29e5f5a136575feb2cfea6fb25c20608557674098f
|
3 |
size 4957560304
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3989163248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e624a7a0172e021f4952abfbdca579a62820eee0e2b3e77971e4d3e381845328
|
3 |
size 3989163248
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17893874312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cbc9c739918e3728ff92771938e8d94644dc34caafd6417d182451fadc9bb8f
|
3 |
size 17893874312
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d016b6de510b811bd11c3b655ffc83267ff09c3e71671a7fad681fb0f1d2317d
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7238,6 +7238,125 @@
|
|
7238 |
"learning_rate": 3.236257876723725e-06,
|
7239 |
"loss": 0.5991,
|
7240 |
"step": 3099
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7241 |
}
|
7242 |
],
|
7243 |
"logging_steps": 3,
|
@@ -7257,7 +7376,7 @@
|
|
7257 |
"attributes": {}
|
7258 |
}
|
7259 |
},
|
7260 |
-
"total_flos": 2.
|
7261 |
"train_batch_size": 8,
|
7262 |
"trial_name": null,
|
7263 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9341637010676157,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3150,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7238 |
"learning_rate": 3.236257876723725e-06,
|
7239 |
"loss": 0.5991,
|
7240 |
"step": 3099
|
7241 |
+
},
|
7242 |
+
{
|
7243 |
+
"epoch": 0.9199288256227758,
|
7244 |
+
"grad_norm": 0.259765625,
|
7245 |
+
"learning_rate": 3.165897783676275e-06,
|
7246 |
+
"loss": 0.5901,
|
7247 |
+
"step": 3102
|
7248 |
+
},
|
7249 |
+
{
|
7250 |
+
"epoch": 0.9208185053380783,
|
7251 |
+
"grad_norm": 0.27734375,
|
7252 |
+
"learning_rate": 3.0962986735020738e-06,
|
7253 |
+
"loss": 0.6183,
|
7254 |
+
"step": 3105
|
7255 |
+
},
|
7256 |
+
{
|
7257 |
+
"epoch": 0.9217081850533808,
|
7258 |
+
"grad_norm": 0.255859375,
|
7259 |
+
"learning_rate": 3.027461093154449e-06,
|
7260 |
+
"loss": 0.5892,
|
7261 |
+
"step": 3108
|
7262 |
+
},
|
7263 |
+
{
|
7264 |
+
"epoch": 0.9225978647686833,
|
7265 |
+
"grad_norm": 0.2578125,
|
7266 |
+
"learning_rate": 2.959385583602081e-06,
|
7267 |
+
"loss": 0.6269,
|
7268 |
+
"step": 3111
|
7269 |
+
},
|
7270 |
+
{
|
7271 |
+
"epoch": 0.9234875444839857,
|
7272 |
+
"grad_norm": 0.26171875,
|
7273 |
+
"learning_rate": 2.8920726798248643e-06,
|
7274 |
+
"loss": 0.5946,
|
7275 |
+
"step": 3114
|
7276 |
+
},
|
7277 |
+
{
|
7278 |
+
"epoch": 0.9243772241992882,
|
7279 |
+
"grad_norm": 0.259765625,
|
7280 |
+
"learning_rate": 2.8255229108096527e-06,
|
7281 |
+
"loss": 0.6192,
|
7282 |
+
"step": 3117
|
7283 |
+
},
|
7284 |
+
{
|
7285 |
+
"epoch": 0.9252669039145908,
|
7286 |
+
"grad_norm": 0.255859375,
|
7287 |
+
"learning_rate": 2.7597367995461086e-06,
|
7288 |
+
"loss": 0.6153,
|
7289 |
+
"step": 3120
|
7290 |
+
},
|
7291 |
+
{
|
7292 |
+
"epoch": 0.9261565836298933,
|
7293 |
+
"grad_norm": 0.265625,
|
7294 |
+
"learning_rate": 2.694714863022585e-06,
|
7295 |
+
"loss": 0.5831,
|
7296 |
+
"step": 3123
|
7297 |
+
},
|
7298 |
+
{
|
7299 |
+
"epoch": 0.9270462633451957,
|
7300 |
+
"grad_norm": 0.267578125,
|
7301 |
+
"learning_rate": 2.6304576122221035e-06,
|
7302 |
+
"loss": 0.5898,
|
7303 |
+
"step": 3126
|
7304 |
+
},
|
7305 |
+
{
|
7306 |
+
"epoch": 0.9279359430604982,
|
7307 |
+
"grad_norm": 0.2490234375,
|
7308 |
+
"learning_rate": 2.566965552118272e-06,
|
7309 |
+
"loss": 0.6098,
|
7310 |
+
"step": 3129
|
7311 |
+
},
|
7312 |
+
{
|
7313 |
+
"epoch": 0.9288256227758007,
|
7314 |
+
"grad_norm": 0.251953125,
|
7315 |
+
"learning_rate": 2.504239181671353e-06,
|
7316 |
+
"loss": 0.5932,
|
7317 |
+
"step": 3132
|
7318 |
+
},
|
7319 |
+
{
|
7320 |
+
"epoch": 0.9297153024911032,
|
7321 |
+
"grad_norm": 0.259765625,
|
7322 |
+
"learning_rate": 2.4422789938243763e-06,
|
7323 |
+
"loss": 0.5877,
|
7324 |
+
"step": 3135
|
7325 |
+
},
|
7326 |
+
{
|
7327 |
+
"epoch": 0.9306049822064056,
|
7328 |
+
"grad_norm": 0.279296875,
|
7329 |
+
"learning_rate": 2.381085475499201e-06,
|
7330 |
+
"loss": 0.5755,
|
7331 |
+
"step": 3138
|
7332 |
+
},
|
7333 |
+
{
|
7334 |
+
"epoch": 0.9314946619217082,
|
7335 |
+
"grad_norm": 0.263671875,
|
7336 |
+
"learning_rate": 2.3206591075927376e-06,
|
7337 |
+
"loss": 0.5875,
|
7338 |
+
"step": 3141
|
7339 |
+
},
|
7340 |
+
{
|
7341 |
+
"epoch": 0.9323843416370107,
|
7342 |
+
"grad_norm": 0.265625,
|
7343 |
+
"learning_rate": 2.2610003649731092e-06,
|
7344 |
+
"loss": 0.6113,
|
7345 |
+
"step": 3144
|
7346 |
+
},
|
7347 |
+
{
|
7348 |
+
"epoch": 0.9332740213523132,
|
7349 |
+
"grad_norm": 0.2578125,
|
7350 |
+
"learning_rate": 2.2021097164760085e-06,
|
7351 |
+
"loss": 0.6035,
|
7352 |
+
"step": 3147
|
7353 |
+
},
|
7354 |
+
{
|
7355 |
+
"epoch": 0.9341637010676157,
|
7356 |
+
"grad_norm": 0.26171875,
|
7357 |
+
"learning_rate": 2.143987624900945e-06,
|
7358 |
+
"loss": 0.5813,
|
7359 |
+
"step": 3150
|
7360 |
}
|
7361 |
],
|
7362 |
"logging_steps": 3,
|
|
|
7376 |
"attributes": {}
|
7377 |
}
|
7378 |
},
|
7379 |
+
"total_flos": 2.0621509178953826e+19,
|
7380 |
"train_batch_size": 8,
|
7381 |
"trial_name": null,
|
7382 |
"trial_params": null
|