Rakhman16 commited on
Commit
09c7cb8
·
verified ·
1 Parent(s): 3be10f4

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8a61fd73bc588dbdc24db480c467fa85865340704d30acbb701842098e2e69f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96363d74eb8e505e30e70175f38cf365c3d95b9352b85ccbcdda14b0b97b0604
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b1a7761b758c7fd9d022ced7c255b59c0684b9396756996b22a9e7e9bd5c35b
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9032c06bd1a8e5aba237fba062e9b942284f080fef0916342f268300ad8d5730
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c102029d2f3d2ad8c4cbe0e9eb23a6db91408d1239150f6a5fe227f52911673
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a76e94d15e6f47128689bef83934eee553e34dee3554bde049b53753e0e7480
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aa8492157bf2c1d0061046d13454f95f14ea95d23b20e0a054ab57ecde42b5d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c3f10b5ea24e9191e915e4456d16cc3b17d3916946799417c52e3e474e43201
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.117206982543641,
5
  "eval_steps": 500,
6
- "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -136,6 +136,13 @@
136
  "learning_rate": 7.536159600997507e-06,
137
  "loss": 0.3313,
138
  "step": 7500
 
 
 
 
 
 
 
139
  }
140
  ],
141
  "logging_steps": 500,
@@ -155,7 +162,7 @@
155
  "attributes": {}
156
  }
157
  },
158
- "total_flos": 1.826690984312832e+16,
159
  "train_batch_size": 4,
160
  "trial_name": null,
161
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.3250207813798838,
5
  "eval_steps": 500,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
136
  "learning_rate": 7.536159600997507e-06,
137
  "loss": 0.3313,
138
  "step": 7500
139
+ },
140
+ {
141
+ "epoch": 3.3250207813798838,
142
+ "grad_norm": 0.9092524647712708,
143
+ "learning_rate": 6.704904405652536e-06,
144
+ "loss": 0.3265,
145
+ "step": 8000
146
  }
147
  ],
148
  "logging_steps": 500,
 
162
  "attributes": {}
163
  }
164
  },
165
+ "total_flos": 1.948482562424832e+16,
166
  "train_batch_size": 4,
167
  "trial_name": null,
168
  "trial_params": null