iamnguyen commited on
Commit
bf1593c
·
verified ·
1 Parent(s): 03d7759

Training in progress, step 2472, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f0114b81da15b6e0edb34df7bec916aec9ae13a9b8de447966c00c46642455a
3
  size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e011c421a60c7810ee24b763468071afa26d0387457e13695871b896b3574643
3
  size 147770496
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e4b82b14943b2783fab220f436567f79afa2f07a9450944f80bd10e44ee4d56
3
  size 75455810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb75a806bcf4890f062200854fe7f6686e84cf4c12a91ba62120df4c2eb5adf
3
  size 75455810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ed5e4e4db4550f7bdc953fe7ff92dc2e4e1604d975edafb5fd4e3d2ea5b4c14
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d921c4c665dbdab2f06d441e60a0ada3a95bd5abf389b9ce1b6155ead82a6b9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73a898b953c5a06d67dc9a202d5e9a9060db1598ab27acaafc60405a146042dd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbe24ee8253c280cdee86e896039139243254be40cb03c9cd81a3d19b30dd4a2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9562865432904135,
5
  "eval_steps": 500,
6
- "global_step": 2468,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17283,6 +17283,34 @@
17283
  "learning_rate": 4.8352667637490694e-08,
17284
  "loss": 1.3931,
17285
  "step": 2468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17286
  }
17287
  ],
17288
  "logging_steps": 1.0,
@@ -17302,7 +17330,7 @@
17302
  "attributes": {}
17303
  }
17304
  },
17305
- "total_flos": 2.440818876968626e+18,
17306
  "train_batch_size": 1,
17307
  "trial_name": null,
17308
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9578364404432342,
5
  "eval_steps": 500,
6
+ "global_step": 2472,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17283
  "learning_rate": 4.8352667637490694e-08,
17284
  "loss": 1.3931,
17285
  "step": 2468
17286
+ },
17287
+ {
17288
+ "epoch": 0.9566740175786187,
17289
+ "grad_norm": 0.17757678031921387,
17290
+ "learning_rate": 4.749444525663338e-08,
17291
+ "loss": 1.3889,
17292
+ "step": 2469
17293
+ },
17294
+ {
17295
+ "epoch": 0.9570614918668239,
17296
+ "grad_norm": 0.19035592675209045,
17297
+ "learning_rate": 4.6643871274521525e-08,
17298
+ "loss": 1.4116,
17299
+ "step": 2470
17300
+ },
17301
+ {
17302
+ "epoch": 0.9574489661550291,
17303
+ "grad_norm": 0.18089327216148376,
17304
+ "learning_rate": 4.5800947004738806e-08,
17305
+ "loss": 1.4567,
17306
+ "step": 2471
17307
+ },
17308
+ {
17309
+ "epoch": 0.9578364404432342,
17310
+ "grad_norm": 0.16921097040176392,
17311
+ "learning_rate": 4.4965673749054474e-08,
17312
+ "loss": 1.3548,
17313
+ "step": 2472
17314
  }
17315
  ],
17316
  "logging_steps": 1.0,
 
17330
  "attributes": {}
17331
  }
17332
  },
17333
+ "total_flos": 2.444950205418074e+18,
17334
  "train_batch_size": 1,
17335
  "trial_name": null,
17336
  "trial_params": null