Upload checkpoint 2650
Browse files- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +122 -3
- training_args.bin +1 -1
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4957560304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e47ae497fc6376c1570e0b513bf9eba564cc3dc63409c7e352d60abc4b33dfb
|
3 |
size 4957560304
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3989163248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53f4a1f81f197d242f479aec2824d8027d9ad76442cadd887c5d26f6f06c2443
|
3 |
size 3989163248
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17893874312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f470f55bb1f8a4a87c704bba419f91130e62a0423baf87cabcf0ccecebccdf40
|
3 |
size 17893874312
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fcc82d44958fdcee26546444839e8daa60c49af9c4214203fed27662062db81
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6069,6 +6069,125 @@
|
|
6069 |
"learning_rate": 2.5034639808154114e-05,
|
6070 |
"loss": 0.6276,
|
6071 |
"step": 2598
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6072 |
}
|
6073 |
],
|
6074 |
"logging_steps": 3,
|
@@ -6088,7 +6207,7 @@
|
|
6088 |
"attributes": {}
|
6089 |
}
|
6090 |
},
|
6091 |
-
"total_flos": 1.
|
6092 |
"train_batch_size": 8,
|
6093 |
"trial_name": null,
|
6094 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7858837485172004,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2650,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6069 |
"learning_rate": 2.5034639808154114e-05,
|
6070 |
"loss": 0.6276,
|
6071 |
"step": 2598
|
6072 |
+
},
|
6073 |
+
{
|
6074 |
+
"epoch": 0.7713523131672598,
|
6075 |
+
"grad_norm": 0.2578125,
|
6076 |
+
"learning_rate": 2.4849402056698334e-05,
|
6077 |
+
"loss": 0.6062,
|
6078 |
+
"step": 2601
|
6079 |
+
},
|
6080 |
+
{
|
6081 |
+
"epoch": 0.7722419928825622,
|
6082 |
+
"grad_norm": 0.263671875,
|
6083 |
+
"learning_rate": 2.4664754885592268e-05,
|
6084 |
+
"loss": 0.5881,
|
6085 |
+
"step": 2604
|
6086 |
+
},
|
6087 |
+
{
|
6088 |
+
"epoch": 0.7731316725978647,
|
6089 |
+
"grad_norm": 0.2578125,
|
6090 |
+
"learning_rate": 2.4480699745908707e-05,
|
6091 |
+
"loss": 0.6124,
|
6092 |
+
"step": 2607
|
6093 |
+
},
|
6094 |
+
{
|
6095 |
+
"epoch": 0.7740213523131673,
|
6096 |
+
"grad_norm": 0.259765625,
|
6097 |
+
"learning_rate": 2.4297238084067985e-05,
|
6098 |
+
"loss": 0.5779,
|
6099 |
+
"step": 2610
|
6100 |
+
},
|
6101 |
+
{
|
6102 |
+
"epoch": 0.7749110320284698,
|
6103 |
+
"grad_norm": 0.263671875,
|
6104 |
+
"learning_rate": 2.4114371341826415e-05,
|
6105 |
+
"loss": 0.6019,
|
6106 |
+
"step": 2613
|
6107 |
+
},
|
6108 |
+
{
|
6109 |
+
"epoch": 0.7758007117437722,
|
6110 |
+
"grad_norm": 0.259765625,
|
6111 |
+
"learning_rate": 2.3932100956265148e-05,
|
6112 |
+
"loss": 0.6087,
|
6113 |
+
"step": 2616
|
6114 |
+
},
|
6115 |
+
{
|
6116 |
+
"epoch": 0.7766903914590747,
|
6117 |
+
"grad_norm": 0.265625,
|
6118 |
+
"learning_rate": 2.375042835977872e-05,
|
6119 |
+
"loss": 0.5983,
|
6120 |
+
"step": 2619
|
6121 |
+
},
|
6122 |
+
{
|
6123 |
+
"epoch": 0.7775800711743772,
|
6124 |
+
"grad_norm": 0.283203125,
|
6125 |
+
"learning_rate": 2.3569354980063906e-05,
|
6126 |
+
"loss": 0.6024,
|
6127 |
+
"step": 2622
|
6128 |
+
},
|
6129 |
+
{
|
6130 |
+
"epoch": 0.7784697508896797,
|
6131 |
+
"grad_norm": 0.25,
|
6132 |
+
"learning_rate": 2.3388882240108423e-05,
|
6133 |
+
"loss": 0.6039,
|
6134 |
+
"step": 2625
|
6135 |
+
},
|
6136 |
+
{
|
6137 |
+
"epoch": 0.7793594306049823,
|
6138 |
+
"grad_norm": 0.26953125,
|
6139 |
+
"learning_rate": 2.3209011558179826e-05,
|
6140 |
+
"loss": 0.5958,
|
6141 |
+
"step": 2628
|
6142 |
+
},
|
6143 |
+
{
|
6144 |
+
"epoch": 0.7802491103202847,
|
6145 |
+
"grad_norm": 0.26171875,
|
6146 |
+
"learning_rate": 2.3029744347814365e-05,
|
6147 |
+
"loss": 0.5979,
|
6148 |
+
"step": 2631
|
6149 |
+
},
|
6150 |
+
{
|
6151 |
+
"epoch": 0.7811387900355872,
|
6152 |
+
"grad_norm": 0.25390625,
|
6153 |
+
"learning_rate": 2.2851082017805703e-05,
|
6154 |
+
"loss": 0.5918,
|
6155 |
+
"step": 2634
|
6156 |
+
},
|
6157 |
+
{
|
6158 |
+
"epoch": 0.7820284697508897,
|
6159 |
+
"grad_norm": 0.26171875,
|
6160 |
+
"learning_rate": 2.2673025972194106e-05,
|
6161 |
+
"loss": 0.5906,
|
6162 |
+
"step": 2637
|
6163 |
+
},
|
6164 |
+
{
|
6165 |
+
"epoch": 0.7829181494661922,
|
6166 |
+
"grad_norm": 0.255859375,
|
6167 |
+
"learning_rate": 2.2495577610255203e-05,
|
6168 |
+
"loss": 0.5857,
|
6169 |
+
"step": 2640
|
6170 |
+
},
|
6171 |
+
{
|
6172 |
+
"epoch": 0.7838078291814946,
|
6173 |
+
"grad_norm": 0.26171875,
|
6174 |
+
"learning_rate": 2.2318738326489074e-05,
|
6175 |
+
"loss": 0.602,
|
6176 |
+
"step": 2643
|
6177 |
+
},
|
6178 |
+
{
|
6179 |
+
"epoch": 0.7846975088967971,
|
6180 |
+
"grad_norm": 0.26171875,
|
6181 |
+
"learning_rate": 2.2142509510609277e-05,
|
6182 |
+
"loss": 0.5846,
|
6183 |
+
"step": 2646
|
6184 |
+
},
|
6185 |
+
{
|
6186 |
+
"epoch": 0.7855871886120996,
|
6187 |
+
"grad_norm": 0.265625,
|
6188 |
+
"learning_rate": 2.196689254753196e-05,
|
6189 |
+
"loss": 0.5983,
|
6190 |
+
"step": 2649
|
6191 |
}
|
6192 |
],
|
6193 |
"logging_steps": 3,
|
|
|
6207 |
"attributes": {}
|
6208 |
}
|
6209 |
},
|
6210 |
+
"total_flos": 1.734825375372306e+19,
|
6211 |
"train_batch_size": 8,
|
6212 |
"trial_name": null,
|
6213 |
"trial_params": null
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81361d744621af8d01826a244bb989030bb99e3808495725a86acec31100d138
|
3 |
size 5368
|