TinyPixel commited on
Commit
07a6444
1 Parent(s): f6cbb1f

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "auto_mapping": null,
3
- "base_model_name_or_path": "stabilityai/stablelm-3b-4e1t",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
 
1
  {
2
  "auto_mapping": null,
3
+ "base_model_name_or_path": "TinyPixel/stablelm",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d692eef8783a5809c34d18a8eacf4d678fa37b56040ea26bf5f7a6a2a90cba4
3
  size 100299853
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c690e8b052a98abeb39872cb4668cf2cefd6ccb4ecc8881e14e1acb661e46e43
3
  size 100299853
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:122f1440bd279ce0452acc44cab5fd917d52919430140e96e0a1afa92a279274
3
  size 200654493
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:093f385b478c92bb4a100b5c24def9ad4b30990aca4931f46f9609ec3a38ddaf
3
  size 200654493
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5442a4645f109cec1748c1106425e6f87cb24f7c99ef94111bc9a1bef94005ce
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ad9ff80e395cf76c3eda3ae3a2c0eabca36c3b44b08450afed7ef200f0c1395
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46e55040aa1af00a83d3cfc4742494f603f85096bc1e1f10cdb1175783f87cd0
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c529344100b83f0cbd85b750e8f59c4a3e7416e0ad1ad22eaaefcac9b50fef9d
3
  size 627
special_tokens_map.json CHANGED
@@ -1,6 +1,34 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_end|>",
4
+ "<|im_start|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|im_start|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|im_end|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "unk_token": {
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
  }
tokenizer.json CHANGED
@@ -227,6 +227,24 @@
227
  "rstrip": false,
228
  "normalized": true,
229
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
231
  ],
232
  "normalizer": {
 
227
  "rstrip": false,
228
  "normalized": true,
229
  "special": false
230
+ },
231
+ {
232
+ "id": 50277,
233
+ "content": "<|im_end|>",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
+ },
240
+ {
241
+ "id": 50278,
242
+ "content": "<|im_start|>",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
  }
249
  ],
250
  "normalizer": {
tokenizer_config.json CHANGED
@@ -200,11 +200,31 @@
200
  "rstrip": false,
201
  "single_word": false,
202
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  }
204
  },
205
- "bos_token": "<|endoftext|>",
 
 
 
 
206
  "clean_up_tokenization_spaces": true,
207
- "eos_token": "<|endoftext|>",
208
  "model_max_length": 1024,
209
  "pad_token": "<|endoftext|>",
210
  "tokenizer_class": "GPTNeoXTokenizer",
 
200
  "rstrip": false,
201
  "single_word": false,
202
  "special": false
203
+ },
204
+ "50277": {
205
+ "content": "<|im_end|>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "50278": {
213
+ "content": "<|im_start|>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
  }
220
  },
221
+ "additional_special_tokens": [
222
+ "<|im_end|>",
223
+ "<|im_start|>"
224
+ ],
225
+ "bos_token": "<|im_start|>",
226
  "clean_up_tokenization_spaces": true,
227
+ "eos_token": "<|im_end|>",
228
  "model_max_length": 1024,
229
  "pad_token": "<|endoftext|>",
230
  "tokenizer_class": "GPTNeoXTokenizer",
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9939527212754261,
5
  "eval_steps": 500,
6
- "global_step": 113,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,345 +11,345 @@
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 3.3333333333333335e-05,
14
- "loss": 2.2167,
15
  "step": 2
16
  },
17
  {
18
  "epoch": 0.04,
19
  "learning_rate": 6.666666666666667e-05,
20
- "loss": 2.2388,
21
  "step": 4
22
  },
23
  {
24
  "epoch": 0.05,
25
  "learning_rate": 0.0001,
26
- "loss": 2.0807,
27
  "step": 6
28
  },
29
  {
30
  "epoch": 0.07,
31
  "learning_rate": 0.00013333333333333334,
32
- "loss": 2.0822,
33
  "step": 8
34
  },
35
  {
36
  "epoch": 0.09,
37
  "learning_rate": 0.0001666666666666667,
38
- "loss": 1.9579,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.11,
43
  "learning_rate": 0.0002,
44
- "loss": 1.959,
45
  "step": 12
46
  },
47
  {
48
  "epoch": 0.12,
49
- "learning_rate": 0.00019603960396039606,
50
- "loss": 1.9575,
51
  "step": 14
52
  },
53
  {
54
  "epoch": 0.14,
55
- "learning_rate": 0.00019207920792079208,
56
- "loss": 1.8013,
57
  "step": 16
58
  },
59
  {
60
  "epoch": 0.16,
61
- "learning_rate": 0.00018811881188118812,
62
- "loss": 1.8639,
63
  "step": 18
64
  },
65
  {
66
  "epoch": 0.18,
67
- "learning_rate": 0.00018415841584158417,
68
- "loss": 1.9356,
69
  "step": 20
70
  },
71
  {
72
- "epoch": 0.19,
73
- "learning_rate": 0.00018019801980198022,
74
- "loss": 1.7797,
75
  "step": 22
76
  },
77
  {
78
  "epoch": 0.21,
79
- "learning_rate": 0.00017623762376237624,
80
- "loss": 1.8457,
81
  "step": 24
82
  },
83
  {
84
  "epoch": 0.23,
85
- "learning_rate": 0.00017227722772277228,
86
- "loss": 1.8558,
87
  "step": 26
88
  },
89
  {
90
  "epoch": 0.25,
91
- "learning_rate": 0.00016831683168316833,
92
- "loss": 1.774,
93
  "step": 28
94
  },
95
  {
96
- "epoch": 0.26,
97
- "learning_rate": 0.00016435643564356435,
98
- "loss": 1.784,
99
  "step": 30
100
  },
101
  {
102
- "epoch": 0.28,
103
- "learning_rate": 0.00016039603960396042,
104
- "loss": 1.7394,
105
  "step": 32
106
  },
107
  {
108
  "epoch": 0.3,
109
- "learning_rate": 0.00015643564356435644,
110
- "loss": 1.7558,
111
  "step": 34
112
  },
113
  {
114
  "epoch": 0.32,
115
- "learning_rate": 0.0001524752475247525,
116
- "loss": 1.7461,
117
  "step": 36
118
  },
119
  {
120
- "epoch": 0.33,
121
- "learning_rate": 0.0001485148514851485,
122
- "loss": 1.7995,
123
  "step": 38
124
  },
125
  {
126
- "epoch": 0.35,
127
- "learning_rate": 0.00014455445544554456,
128
- "loss": 1.7515,
129
  "step": 40
130
  },
131
  {
132
- "epoch": 0.37,
133
- "learning_rate": 0.0001405940594059406,
134
- "loss": 1.8347,
135
  "step": 42
136
  },
137
  {
138
  "epoch": 0.39,
139
- "learning_rate": 0.00013663366336633665,
140
- "loss": 1.8173,
141
  "step": 44
142
  },
143
  {
144
- "epoch": 0.4,
145
- "learning_rate": 0.0001326732673267327,
146
- "loss": 1.593,
147
  "step": 46
148
  },
149
  {
150
- "epoch": 0.42,
151
- "learning_rate": 0.00012871287128712872,
152
- "loss": 1.667,
153
  "step": 48
154
  },
155
  {
156
- "epoch": 0.44,
157
- "learning_rate": 0.00012475247524752477,
158
- "loss": 1.8398,
159
  "step": 50
160
  },
161
  {
162
  "epoch": 0.46,
163
- "learning_rate": 0.0001207920792079208,
164
- "loss": 1.7201,
165
  "step": 52
166
  },
167
  {
168
- "epoch": 0.47,
169
- "learning_rate": 0.00011683168316831683,
170
- "loss": 1.7293,
171
  "step": 54
172
  },
173
  {
174
- "epoch": 0.49,
175
- "learning_rate": 0.00011287128712871287,
176
- "loss": 1.7615,
177
  "step": 56
178
  },
179
  {
180
- "epoch": 0.51,
181
- "learning_rate": 0.00010891089108910893,
182
- "loss": 1.7714,
183
  "step": 58
184
  },
185
  {
186
- "epoch": 0.53,
187
- "learning_rate": 0.00010495049504950496,
188
- "loss": 1.7434,
189
  "step": 60
190
  },
191
  {
192
  "epoch": 0.55,
193
- "learning_rate": 0.00010099009900990099,
194
- "loss": 1.7252,
195
  "step": 62
196
  },
197
  {
198
- "epoch": 0.56,
199
- "learning_rate": 9.702970297029703e-05,
200
- "loss": 1.7319,
201
  "step": 64
202
  },
203
  {
204
- "epoch": 0.58,
205
- "learning_rate": 9.306930693069307e-05,
206
- "loss": 1.7242,
207
  "step": 66
208
  },
209
  {
210
- "epoch": 0.6,
211
- "learning_rate": 8.910891089108912e-05,
212
- "loss": 1.7719,
213
  "step": 68
214
  },
215
  {
216
  "epoch": 0.62,
217
- "learning_rate": 8.514851485148515e-05,
218
- "loss": 1.7428,
219
  "step": 70
220
  },
221
  {
222
- "epoch": 0.63,
223
- "learning_rate": 8.11881188118812e-05,
224
- "loss": 1.7447,
225
  "step": 72
226
  },
227
  {
228
- "epoch": 0.65,
229
- "learning_rate": 7.722772277227723e-05,
230
- "loss": 1.7924,
231
  "step": 74
232
  },
233
  {
234
- "epoch": 0.67,
235
- "learning_rate": 7.326732673267327e-05,
236
- "loss": 1.7244,
237
  "step": 76
238
  },
239
  {
240
- "epoch": 0.69,
241
- "learning_rate": 6.93069306930693e-05,
242
- "loss": 1.7086,
243
  "step": 78
244
  },
245
  {
246
- "epoch": 0.7,
247
- "learning_rate": 6.534653465346535e-05,
248
- "loss": 1.693,
249
  "step": 80
250
  },
251
  {
252
- "epoch": 0.72,
253
- "learning_rate": 6.13861386138614e-05,
254
- "loss": 1.7037,
255
  "step": 82
256
  },
257
  {
258
- "epoch": 0.74,
259
- "learning_rate": 5.742574257425742e-05,
260
- "loss": 1.7431,
261
  "step": 84
262
  },
263
  {
264
- "epoch": 0.76,
265
- "learning_rate": 5.346534653465347e-05,
266
- "loss": 1.8218,
267
  "step": 86
268
  },
269
  {
270
- "epoch": 0.77,
271
- "learning_rate": 4.950495049504951e-05,
272
- "loss": 1.7956,
273
  "step": 88
274
  },
275
  {
276
- "epoch": 0.79,
277
- "learning_rate": 4.554455445544555e-05,
278
- "loss": 1.7614,
279
  "step": 90
280
  },
281
  {
282
- "epoch": 0.81,
283
- "learning_rate": 4.158415841584158e-05,
284
- "loss": 1.7205,
285
  "step": 92
286
  },
287
  {
288
- "epoch": 0.83,
289
- "learning_rate": 3.762376237623763e-05,
290
- "loss": 1.753,
291
  "step": 94
292
  },
293
  {
294
- "epoch": 0.84,
295
- "learning_rate": 3.366336633663367e-05,
296
- "loss": 1.6786,
297
  "step": 96
298
  },
299
  {
300
- "epoch": 0.86,
301
- "learning_rate": 2.9702970297029702e-05,
302
- "loss": 1.849,
303
  "step": 98
304
  },
305
  {
306
- "epoch": 0.88,
307
- "learning_rate": 2.5742574257425746e-05,
308
- "loss": 1.7078,
309
  "step": 100
310
  },
311
  {
312
- "epoch": 0.9,
313
- "learning_rate": 2.1782178217821783e-05,
314
- "loss": 1.6983,
315
  "step": 102
316
  },
317
  {
318
- "epoch": 0.91,
319
- "learning_rate": 1.7821782178217823e-05,
320
- "loss": 1.729,
321
  "step": 104
322
  },
323
  {
324
- "epoch": 0.93,
325
- "learning_rate": 1.3861386138613863e-05,
326
- "loss": 1.6216,
327
  "step": 106
328
  },
329
  {
330
- "epoch": 0.95,
331
- "learning_rate": 9.900990099009901e-06,
332
- "loss": 1.6221,
333
  "step": 108
334
  },
335
  {
336
- "epoch": 0.97,
337
- "learning_rate": 5.940594059405941e-06,
338
- "loss": 1.7756,
339
  "step": 110
340
  },
341
  {
342
- "epoch": 0.99,
343
- "learning_rate": 1.9801980198019803e-06,
344
- "loss": 1.679,
345
  "step": 112
346
  }
347
  ],
348
  "logging_steps": 2,
349
- "max_steps": 113,
350
  "num_train_epochs": 1,
351
  "save_steps": 500,
352
- "total_flos": 2.9899580069904384e+16,
353
  "trial_name": null,
354
  "trial_params": null
355
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 112,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.02,
13
  "learning_rate": 3.3333333333333335e-05,
14
+ "loss": 2.2162,
15
  "step": 2
16
  },
17
  {
18
  "epoch": 0.04,
19
  "learning_rate": 6.666666666666667e-05,
20
+ "loss": 2.3762,
21
  "step": 4
22
  },
23
  {
24
  "epoch": 0.05,
25
  "learning_rate": 0.0001,
26
+ "loss": 2.2032,
27
  "step": 6
28
  },
29
  {
30
  "epoch": 0.07,
31
  "learning_rate": 0.00013333333333333334,
32
+ "loss": 2.194,
33
  "step": 8
34
  },
35
  {
36
  "epoch": 0.09,
37
  "learning_rate": 0.0001666666666666667,
38
+ "loss": 2.1206,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.11,
43
  "learning_rate": 0.0002,
44
+ "loss": 2.0957,
45
  "step": 12
46
  },
47
  {
48
  "epoch": 0.12,
49
+ "learning_rate": 0.000196,
50
+ "loss": 1.9569,
51
  "step": 14
52
  },
53
  {
54
  "epoch": 0.14,
55
+ "learning_rate": 0.000192,
56
+ "loss": 1.8578,
57
  "step": 16
58
  },
59
  {
60
  "epoch": 0.16,
61
+ "learning_rate": 0.000188,
62
+ "loss": 1.9662,
63
  "step": 18
64
  },
65
  {
66
  "epoch": 0.18,
67
+ "learning_rate": 0.00018400000000000003,
68
+ "loss": 1.9337,
69
  "step": 20
70
  },
71
  {
72
+ "epoch": 0.2,
73
+ "learning_rate": 0.00018,
74
+ "loss": 1.9318,
75
  "step": 22
76
  },
77
  {
78
  "epoch": 0.21,
79
+ "learning_rate": 0.00017600000000000002,
80
+ "loss": 1.9165,
81
  "step": 24
82
  },
83
  {
84
  "epoch": 0.23,
85
+ "learning_rate": 0.000172,
86
+ "loss": 1.8915,
87
  "step": 26
88
  },
89
  {
90
  "epoch": 0.25,
91
+ "learning_rate": 0.000168,
92
+ "loss": 1.9173,
93
  "step": 28
94
  },
95
  {
96
+ "epoch": 0.27,
97
+ "learning_rate": 0.000164,
98
+ "loss": 1.8664,
99
  "step": 30
100
  },
101
  {
102
+ "epoch": 0.29,
103
+ "learning_rate": 0.00016,
104
+ "loss": 1.889,
105
  "step": 32
106
  },
107
  {
108
  "epoch": 0.3,
109
+ "learning_rate": 0.00015600000000000002,
110
+ "loss": 1.8596,
111
  "step": 34
112
  },
113
  {
114
  "epoch": 0.32,
115
+ "learning_rate": 0.000152,
116
+ "loss": 1.8794,
117
  "step": 36
118
  },
119
  {
120
+ "epoch": 0.34,
121
+ "learning_rate": 0.000148,
122
+ "loss": 1.7943,
123
  "step": 38
124
  },
125
  {
126
+ "epoch": 0.36,
127
+ "learning_rate": 0.000144,
128
+ "loss": 1.845,
129
  "step": 40
130
  },
131
  {
132
+ "epoch": 0.38,
133
+ "learning_rate": 0.00014,
134
+ "loss": 1.8562,
135
  "step": 42
136
  },
137
  {
138
  "epoch": 0.39,
139
+ "learning_rate": 0.00013600000000000003,
140
+ "loss": 1.8272,
141
  "step": 44
142
  },
143
  {
144
+ "epoch": 0.41,
145
+ "learning_rate": 0.000132,
146
+ "loss": 1.7889,
147
  "step": 46
148
  },
149
  {
150
+ "epoch": 0.43,
151
+ "learning_rate": 0.00012800000000000002,
152
+ "loss": 1.9758,
153
  "step": 48
154
  },
155
  {
156
+ "epoch": 0.45,
157
+ "learning_rate": 0.000124,
158
+ "loss": 1.8208,
159
  "step": 50
160
  },
161
  {
162
  "epoch": 0.46,
163
+ "learning_rate": 0.00012,
164
+ "loss": 1.8818,
165
  "step": 52
166
  },
167
  {
168
+ "epoch": 0.48,
169
+ "learning_rate": 0.000116,
170
+ "loss": 1.787,
171
  "step": 54
172
  },
173
  {
174
+ "epoch": 0.5,
175
+ "learning_rate": 0.00011200000000000001,
176
+ "loss": 1.772,
177
  "step": 56
178
  },
179
  {
180
+ "epoch": 0.52,
181
+ "learning_rate": 0.00010800000000000001,
182
+ "loss": 1.9647,
183
  "step": 58
184
  },
185
  {
186
+ "epoch": 0.54,
187
+ "learning_rate": 0.00010400000000000001,
188
+ "loss": 1.8582,
189
  "step": 60
190
  },
191
  {
192
  "epoch": 0.55,
193
+ "learning_rate": 0.0001,
194
+ "loss": 1.7461,
195
  "step": 62
196
  },
197
  {
198
+ "epoch": 0.57,
199
+ "learning_rate": 9.6e-05,
200
+ "loss": 1.9042,
201
  "step": 64
202
  },
203
  {
204
+ "epoch": 0.59,
205
+ "learning_rate": 9.200000000000001e-05,
206
+ "loss": 1.8402,
207
  "step": 66
208
  },
209
  {
210
+ "epoch": 0.61,
211
+ "learning_rate": 8.800000000000001e-05,
212
+ "loss": 1.8639,
213
  "step": 68
214
  },
215
  {
216
  "epoch": 0.62,
217
+ "learning_rate": 8.4e-05,
218
+ "loss": 1.8673,
219
  "step": 70
220
  },
221
  {
222
+ "epoch": 0.64,
223
+ "learning_rate": 8e-05,
224
+ "loss": 1.9321,
225
  "step": 72
226
  },
227
  {
228
+ "epoch": 0.66,
229
+ "learning_rate": 7.6e-05,
230
+ "loss": 1.836,
231
  "step": 74
232
  },
233
  {
234
+ "epoch": 0.68,
235
+ "learning_rate": 7.2e-05,
236
+ "loss": 1.8173,
237
  "step": 76
238
  },
239
  {
240
+ "epoch": 0.7,
241
+ "learning_rate": 6.800000000000001e-05,
242
+ "loss": 1.7188,
243
  "step": 78
244
  },
245
  {
246
+ "epoch": 0.71,
247
+ "learning_rate": 6.400000000000001e-05,
248
+ "loss": 1.7957,
249
  "step": 80
250
  },
251
  {
252
+ "epoch": 0.73,
253
+ "learning_rate": 6e-05,
254
+ "loss": 1.8139,
255
  "step": 82
256
  },
257
  {
258
+ "epoch": 0.75,
259
+ "learning_rate": 5.6000000000000006e-05,
260
+ "loss": 1.811,
261
  "step": 84
262
  },
263
  {
264
+ "epoch": 0.77,
265
+ "learning_rate": 5.2000000000000004e-05,
266
+ "loss": 1.7955,
267
  "step": 86
268
  },
269
  {
270
+ "epoch": 0.79,
271
+ "learning_rate": 4.8e-05,
272
+ "loss": 1.9184,
273
  "step": 88
274
  },
275
  {
276
+ "epoch": 0.8,
277
+ "learning_rate": 4.4000000000000006e-05,
278
+ "loss": 1.8547,
279
  "step": 90
280
  },
281
  {
282
+ "epoch": 0.82,
283
+ "learning_rate": 4e-05,
284
+ "loss": 1.7657,
285
  "step": 92
286
  },
287
  {
288
+ "epoch": 0.84,
289
+ "learning_rate": 3.6e-05,
290
+ "loss": 1.9175,
291
  "step": 94
292
  },
293
  {
294
+ "epoch": 0.86,
295
+ "learning_rate": 3.2000000000000005e-05,
296
+ "loss": 1.7922,
297
  "step": 96
298
  },
299
  {
300
+ "epoch": 0.88,
301
+ "learning_rate": 2.8000000000000003e-05,
302
+ "loss": 1.896,
303
  "step": 98
304
  },
305
  {
306
+ "epoch": 0.89,
307
+ "learning_rate": 2.4e-05,
308
+ "loss": 1.8632,
309
  "step": 100
310
  },
311
  {
312
+ "epoch": 0.91,
313
+ "learning_rate": 2e-05,
314
+ "loss": 1.881,
315
  "step": 102
316
  },
317
  {
318
+ "epoch": 0.93,
319
+ "learning_rate": 1.6000000000000003e-05,
320
+ "loss": 1.7914,
321
  "step": 104
322
  },
323
  {
324
+ "epoch": 0.95,
325
+ "learning_rate": 1.2e-05,
326
+ "loss": 1.7627,
327
  "step": 106
328
  },
329
  {
330
+ "epoch": 0.96,
331
+ "learning_rate": 8.000000000000001e-06,
332
+ "loss": 1.7977,
333
  "step": 108
334
  },
335
  {
336
+ "epoch": 0.98,
337
+ "learning_rate": 4.000000000000001e-06,
338
+ "loss": 1.8781,
339
  "step": 110
340
  },
341
  {
342
+ "epoch": 1.0,
343
+ "learning_rate": 0.0,
344
+ "loss": 1.7793,
345
  "step": 112
346
  }
347
  ],
348
  "logging_steps": 2,
349
+ "max_steps": 112,
350
  "num_train_epochs": 1,
351
  "save_steps": 500,
352
+ "total_flos": 2.9635038387634176e+16,
353
  "trial_name": null,
354
  "trial_params": null
355
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a0844a6255b035bd43504d20e5da3307bce8a53af0b487f60c8262ccb1779f3
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5868d56ccca2b526a144ee1f7fa1118d47eb27be4eef91132608f5f6924f8c7c
3
  size 4027