jeffrey03 commited on
Commit
516730d
·
verified ·
1 Parent(s): 8dc3667

Training in progress, step 750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe1268771560fe4054985ba533e0d8e1265cb68e494e6838eb647fc9edb702fe
3
  size 109086416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba78013670a5e8754e3e538080ba8af3888f00be0641ecd712b9a49e93870c1
3
  size 109086416
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:528c8e3d78412132b11bbbba1ab1ff9a9185cd6b58e563de0069b7b6eb0374bc
3
  size 218260730
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b192ffa3c570887b69f25ba42412c7594e70565f65b802562fded9f8bbdb794
3
  size 218260730
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54d1167478c538418dde090909f0f0a092d6eae3b53af5e2553aef643b517032
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72ed5d4a41f5335cd07414271c9444686910c8d74653e496881abb75037bae2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52173b10baaccdab50345561c45a5209e7b1946a31fefd1529f3955468effd15
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d5e804c4c0039d75698286d90173db37204c5c7e59b000f6920a7c9798abfca
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6987907886505127,
3
- "best_model_checkpoint": "Pricer-FineTune-OpenSource-2024-10-23_08.48.15/checkpoint-500",
4
- "epoch": 0.8,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,81 @@
157
  "eval_samples_per_second": 20.063,
158
  "eval_steps_per_second": 5.016,
159
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 50,
@@ -176,7 +251,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 1.2852129603531571e+17,
180
  "train_batch_size": 16,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.6910121440887451,
3
+ "best_model_checkpoint": "Pricer-FineTune-OpenSource-2024-10-23_08.48.15/checkpoint-750",
4
+ "epoch": 1.2,
5
  "eval_steps": 50,
6
+ "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_samples_per_second": 20.063,
158
  "eval_steps_per_second": 5.016,
159
  "step": 500
160
+ },
161
+ {
162
+ "epoch": 0.88,
163
+ "grad_norm": 1.2332689762115479,
164
+ "learning_rate": 8.292667103996738e-05,
165
+ "loss": 1.7313,
166
+ "step": 550
167
+ },
168
+ {
169
+ "epoch": 0.88,
170
+ "eval_loss": 1.7032952308654785,
171
+ "eval_runtime": 4.9781,
172
+ "eval_samples_per_second": 20.088,
173
+ "eval_steps_per_second": 5.022,
174
+ "step": 550
175
+ },
176
+ {
177
+ "epoch": 0.96,
178
+ "grad_norm": 1.2375348806381226,
179
+ "learning_rate": 7.955677188099235e-05,
180
+ "loss": 1.6986,
181
+ "step": 600
182
+ },
183
+ {
184
+ "epoch": 0.96,
185
+ "eval_loss": 1.6838198900222778,
186
+ "eval_runtime": 4.9789,
187
+ "eval_samples_per_second": 20.085,
188
+ "eval_steps_per_second": 5.021,
189
+ "step": 600
190
+ },
191
+ {
192
+ "epoch": 1.04,
193
+ "grad_norm": 1.6269843578338623,
194
+ "learning_rate": 7.59663573861888e-05,
195
+ "loss": 1.6478,
196
+ "step": 650
197
+ },
198
+ {
199
+ "epoch": 1.04,
200
+ "eval_loss": 1.717869520187378,
201
+ "eval_runtime": 4.9837,
202
+ "eval_samples_per_second": 20.065,
203
+ "eval_steps_per_second": 5.016,
204
+ "step": 650
205
+ },
206
+ {
207
+ "epoch": 1.12,
208
+ "grad_norm": 1.6488033533096313,
209
+ "learning_rate": 7.218221469798465e-05,
210
+ "loss": 1.6154,
211
+ "step": 700
212
+ },
213
+ {
214
+ "epoch": 1.12,
215
+ "eval_loss": 1.671476125717163,
216
+ "eval_runtime": 5.0061,
217
+ "eval_samples_per_second": 19.976,
218
+ "eval_steps_per_second": 4.994,
219
+ "step": 700
220
+ },
221
+ {
222
+ "epoch": 1.2,
223
+ "grad_norm": 2.118487596511841,
224
+ "learning_rate": 6.823257631413276e-05,
225
+ "loss": 1.5951,
226
+ "step": 750
227
+ },
228
+ {
229
+ "epoch": 1.2,
230
+ "eval_loss": 1.6910121440887451,
231
+ "eval_runtime": 4.9788,
232
+ "eval_samples_per_second": 20.085,
233
+ "eval_steps_per_second": 5.021,
234
+ "step": 750
235
  }
236
  ],
237
  "logging_steps": 50,
 
251
  "attributes": {}
252
  }
253
  },
254
+ "total_flos": 1.927851979580375e+17,
255
  "train_batch_size": 16,
256
  "trial_name": null,
257
  "trial_params": null