Rakhman16 commited on
Commit
3440cfd
·
verified ·
1 Parent(s): cdf8fe4

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ee3e8177e6fadd097833e7afe6be3da19174e3ec791ceabfd5e2aef8276dfb7
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a61fd73bc588dbdc24db480c467fa85865340704d30acbb701842098e2e69f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3b2b702f5cef156784c924666ba6f0516aba8a128ff1c8252b58be7c17dd2df
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1a7761b758c7fd9d022ced7c255b59c0684b9396756996b22a9e7e9bd5c35b
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05dd11bd3960ad0fa940da6d1dc661f5e30b05ee951391d57c064dbbfbc68d4f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c102029d2f3d2ad8c4cbe0e9eb23a6db91408d1239150f6a5fe227f52911673
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8b02bc33df1b7c20f0491626d0e42bd29d99a710da7df8f3162d39edb7196e8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa8492157bf2c1d0061046d13454f95f14ea95d23b20e0a054ab57ecde42b5d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9093931837073983,
5
  "eval_steps": 500,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -121,6 +121,21 @@
121
  "learning_rate": 8.367414796342478e-06,
122
  "loss": 0.3333,
123
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  ],
126
  "logging_steps": 500,
@@ -140,7 +155,7 @@
140
  "attributes": {}
141
  }
142
  },
143
- "total_flos": 1.704960301989888e+16,
144
  "train_batch_size": 4,
145
  "trial_name": null,
146
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.117206982543641,
5
  "eval_steps": 500,
6
+ "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
121
  "learning_rate": 8.367414796342478e-06,
122
  "loss": 0.3333,
123
  "step": 7000
124
+ },
125
+ {
126
+ "epoch": 3.0,
127
+ "eval_loss": 0.2934778034687042,
128
+ "eval_runtime": 26.7608,
129
+ "eval_samples_per_second": 18.46,
130
+ "eval_steps_per_second": 4.634,
131
+ "step": 7218
132
+ },
133
+ {
134
+ "epoch": 3.117206982543641,
135
+ "grad_norm": 0.4342059791088104,
136
+ "learning_rate": 7.536159600997507e-06,
137
+ "loss": 0.3313,
138
+ "step": 7500
139
  }
140
  ],
141
  "logging_steps": 500,
 
155
  "attributes": {}
156
  }
157
  },
158
+ "total_flos": 1.826690984312832e+16,
159
  "train_batch_size": 4,
160
  "trial_name": null,
161
  "trial_params": null