diagonalge commited on
Commit
457c9ed
·
verified ·
1 Parent(s): 7c6a059

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b768e3a4d249cd9bb3425b75641f3e3d66c2119e027046042a354d0900705a9c
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b011a4c60534b932db042fd736d34e87af082176c079c6271cdbf3136a38ac8
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8bc270d065791693bfe545dd959cf593a80072a2fe6e4cb2f28f6520970ef91
3
  size 52046596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb1754679627bc12d4f937b9e502f31a2534b4e172c3cc865c7c598bca6d2e4
3
  size 52046596
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4c6a3932b0c6757b2a554606edacf63dde2370212156fc61645da06ea61feaa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15979b93c5d9f4120bbd488b10f50e9f5e39387c20523984d041d80836523182
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:624cde959d3a917007c76687e7ed04f5f5ce5a570abfa20dd466a4e55f6684fa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288483c81220cbf22f51f35045c4ef691c80220055a03dc4880f485f05c71ede
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.003776197369249166,
5
  "eval_steps": 25,
6
- "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -233,6 +233,76 @@
233
  "learning_rate": 0.0001766044443118978,
234
  "loss": 0.0424,
235
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  }
237
  ],
238
  "logging_steps": 1,
@@ -252,7 +322,7 @@
252
  "attributes": {}
253
  }
254
  },
255
- "total_flos": 1.97561520488448e+16,
256
  "train_batch_size": 2,
257
  "trial_name": null,
258
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.005034929825665555,
5
  "eval_steps": 25,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
233
  "learning_rate": 0.0001766044443118978,
234
  "loss": 0.0424,
235
  "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.003902070614890805,
239
+ "grad_norm": 0.22993697226047516,
240
+ "learning_rate": 0.00017431448254773944,
241
+ "loss": 0.0041,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.004027943860532444,
246
+ "grad_norm": 1.7465827465057373,
247
+ "learning_rate": 0.0001719339800338651,
248
+ "loss": 0.0228,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.004153817106174082,
253
+ "grad_norm": 0.28992122411727905,
254
+ "learning_rate": 0.00016946583704589973,
255
+ "loss": 0.0067,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.004279690351815722,
260
+ "grad_norm": 0.9798206090927124,
261
+ "learning_rate": 0.00016691306063588583,
262
+ "loss": 0.0262,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.004405563597457361,
267
+ "grad_norm": 0.009784302674233913,
268
+ "learning_rate": 0.00016427876096865394,
269
+ "loss": 0.0007,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.004531436843098999,
274
+ "grad_norm": 2.536167860031128,
275
+ "learning_rate": 0.0001615661475325658,
276
+ "loss": 1.0487,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.0046573100887406385,
281
+ "grad_norm": 0.8000788688659668,
282
+ "learning_rate": 0.00015877852522924732,
283
+ "loss": 0.0149,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.004783183334382277,
288
+ "grad_norm": 0.0,
289
+ "learning_rate": 0.0001559192903470747,
290
+ "loss": 0.0,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.004909056580023916,
295
+ "grad_norm": 0.015558776445686817,
296
+ "learning_rate": 0.0001529919264233205,
297
+ "loss": 0.0014,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.005034929825665555,
302
+ "grad_norm": 3.2701315879821777,
303
+ "learning_rate": 0.00015000000000000001,
304
+ "loss": 0.2162,
305
+ "step": 40
306
  }
307
  ],
308
  "logging_steps": 1,
 
322
  "attributes": {}
323
  }
324
  },
325
+ "total_flos": 2.63415360651264e+16,
326
  "train_batch_size": 2,
327
  "trial_name": null,
328
  "trial_params": null