MeedoSam commited on
Commit
6cc3f95
1 Parent(s): de9f7dc

Uploaded checkpoint-15000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a15eb9315483b3e56b05091f1531b26cadbe36a6e736e6f70bda3f75221e6b8c
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:948fb1e98580e330d4d842e33a37f9084da9e25cc7f48a4d02a36faaddab5b4e
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1066f24e58a5547030a7f83888490c431db8d182fcf127c93d1f95d5c5063ec
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099dcb4361bb2049aa6db3b6e81a9f34b1202083b425b25c3af7c20f9259fe22
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21341ea1e3d0ca174eca172ae8f5dd909607c3fbf76f8554ed9f6a5b644ebf0d
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099feb85b2265fa46cec748072b982183c4dbb871e47eb59c01b10707a2a6958
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e92d596f376f9a515800068cef0d91e543a25c37a7a7bdea3611bf7c16f7dcb0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c59c86cac1045db21cd2eb48ce1a34a6e8c676f8d6333a24ac6d07a63dbb2c3
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3125,
5
  "eval_steps": 2500,
6
- "global_step": 12500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -131,6 +131,35 @@
131
  "eval_samples_per_second": 4.94,
132
  "eval_steps_per_second": 4.94,
133
  "step": 12500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  }
135
  ],
136
  "logging_steps": 1000,
@@ -138,7 +167,7 @@
138
  "num_input_tokens_seen": 0,
139
  "num_train_epochs": 1,
140
  "save_steps": 2500,
141
- "total_flos": 2.012765749248e+17,
142
  "train_batch_size": 1,
143
  "trial_name": null,
144
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.375,
5
  "eval_steps": 2500,
6
+ "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
131
  "eval_samples_per_second": 4.94,
132
  "eval_steps_per_second": 4.94,
133
  "step": 12500
134
+ },
135
+ {
136
+ "epoch": 0.33,
137
+ "grad_norm": 3.805086612701416,
138
+ "learning_rate": 1.2756521739130436e-05,
139
+ "loss": 1.4003,
140
+ "step": 13000
141
+ },
142
+ {
143
+ "epoch": 0.35,
144
+ "grad_norm": 6.0652241706848145,
145
+ "learning_rate": 1.21768115942029e-05,
146
+ "loss": 1.3952,
147
+ "step": 14000
148
+ },
149
+ {
150
+ "epoch": 0.38,
151
+ "grad_norm": 8.923527717590332,
152
+ "learning_rate": 1.1597681159420292e-05,
153
+ "loss": 1.3933,
154
+ "step": 15000
155
+ },
156
+ {
157
+ "epoch": 0.38,
158
+ "eval_loss": 1.3821483850479126,
159
+ "eval_runtime": 201.8609,
160
+ "eval_samples_per_second": 4.954,
161
+ "eval_steps_per_second": 4.954,
162
+ "step": 15000
163
  }
164
  ],
165
  "logging_steps": 1000,
 
167
  "num_input_tokens_seen": 0,
168
  "num_train_epochs": 1,
169
  "save_steps": 2500,
170
+ "total_flos": 2.4153188990976e+17,
171
  "train_batch_size": 1,
172
  "trial_name": null,
173
  "trial_params": null