TinyPixel commited on
Commit
f1a6051
1 Parent(s): 07a6444

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "auto_mapping": null,
3
- "base_model_name_or_path": "TinyPixel/stablelm",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
 
1
  {
2
  "auto_mapping": null,
3
+ "base_model_name_or_path": "stabilityai/stablelm-3b-4e1t",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c690e8b052a98abeb39872cb4668cf2cefd6ccb4ecc8881e14e1acb661e46e43
3
  size 100299853
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bae2073e36bb32cfbba8de769e77f64c03b07ec350b5827681ed7636c12884c1
3
  size 100299853
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:093f385b478c92bb4a100b5c24def9ad4b30990aca4931f46f9609ec3a38ddaf
3
  size 200654493
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecbc943d06bb9e8c8a39e4f638b03e0faf3ed5c5c2026eadf7f0c97048905bf6
3
  size 200654493
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ad9ff80e395cf76c3eda3ae3a2c0eabca36c3b44b08450afed7ef200f0c1395
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ecb1da131f28ebbdaed7969d4c7cdc38820701e74de5e10734a014050ce8ad
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c529344100b83f0cbd85b750e8f59c4a3e7416e0ad1ad22eaaefcac9b50fef9d
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95b72119680d337f83be2a4d08a065f8278279b4937edf5107b5f0d8275c4a5
3
  size 627
special_tokens_map.json CHANGED
@@ -1,34 +1,6 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|im_end|>",
4
- "<|im_start|>"
5
- ],
6
- "bos_token": {
7
- "content": "<|im_start|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "eos_token": {
14
- "content": "<|im_end|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
- "pad_token": {
21
- "content": "<|endoftext|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false
26
- },
27
- "unk_token": {
28
- "content": "<|endoftext|>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false
33
- }
34
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
tokenizer.json CHANGED
@@ -227,24 +227,6 @@
227
  "rstrip": false,
228
  "normalized": true,
229
  "special": false
230
- },
231
- {
232
- "id": 50277,
233
- "content": "<|im_end|>",
234
- "single_word": false,
235
- "lstrip": false,
236
- "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
- },
240
- {
241
- "id": 50278,
242
- "content": "<|im_start|>",
243
- "single_word": false,
244
- "lstrip": false,
245
- "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  }
249
  ],
250
  "normalizer": {
 
227
  "rstrip": false,
228
  "normalized": true,
229
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
231
  ],
232
  "normalizer": {
tokenizer_config.json CHANGED
@@ -200,31 +200,11 @@
200
  "rstrip": false,
201
  "single_word": false,
202
  "special": false
203
- },
204
- "50277": {
205
- "content": "<|im_end|>",
206
- "lstrip": false,
207
- "normalized": false,
208
- "rstrip": false,
209
- "single_word": false,
210
- "special": true
211
- },
212
- "50278": {
213
- "content": "<|im_start|>",
214
- "lstrip": false,
215
- "normalized": false,
216
- "rstrip": false,
217
- "single_word": false,
218
- "special": true
219
  }
220
  },
221
- "additional_special_tokens": [
222
- "<|im_end|>",
223
- "<|im_start|>"
224
- ],
225
- "bos_token": "<|im_start|>",
226
  "clean_up_tokenization_spaces": true,
227
- "eos_token": "<|im_end|>",
228
  "model_max_length": 1024,
229
  "pad_token": "<|endoftext|>",
230
  "tokenizer_class": "GPTNeoXTokenizer",
 
200
  "rstrip": false,
201
  "single_word": false,
202
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  }
204
  },
205
+ "bos_token": "<|endoftext|>",
 
 
 
 
206
  "clean_up_tokenization_spaces": true,
207
+ "eos_token": "<|endoftext|>",
208
  "model_max_length": 1024,
209
  "pad_token": "<|endoftext|>",
210
  "tokenizer_class": "GPTNeoXTokenizer",
trainer_state.json CHANGED
@@ -1,355 +1,313 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 112,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.02,
13
- "learning_rate": 3.3333333333333335e-05,
14
- "loss": 2.2162,
15
  "step": 2
16
  },
17
  {
18
- "epoch": 0.04,
19
- "learning_rate": 6.666666666666667e-05,
20
- "loss": 2.3762,
21
  "step": 4
22
  },
23
  {
24
- "epoch": 0.05,
25
- "learning_rate": 0.0001,
26
- "loss": 2.2032,
27
  "step": 6
28
  },
29
  {
30
- "epoch": 0.07,
31
- "learning_rate": 0.00013333333333333334,
32
- "loss": 2.194,
33
  "step": 8
34
  },
35
  {
36
- "epoch": 0.09,
37
- "learning_rate": 0.0001666666666666667,
38
- "loss": 2.1206,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.11,
43
- "learning_rate": 0.0002,
44
- "loss": 2.0957,
45
  "step": 12
46
  },
47
  {
48
- "epoch": 0.12,
49
- "learning_rate": 0.000196,
50
- "loss": 1.9569,
51
  "step": 14
52
  },
53
  {
54
- "epoch": 0.14,
55
- "learning_rate": 0.000192,
56
- "loss": 1.8578,
57
  "step": 16
58
  },
59
  {
60
- "epoch": 0.16,
61
- "learning_rate": 0.000188,
62
- "loss": 1.9662,
63
  "step": 18
64
  },
65
  {
66
- "epoch": 0.18,
67
- "learning_rate": 0.00018400000000000003,
68
- "loss": 1.9337,
69
  "step": 20
70
  },
71
  {
72
- "epoch": 0.2,
73
- "learning_rate": 0.00018,
74
- "loss": 1.9318,
75
  "step": 22
76
  },
77
  {
78
- "epoch": 0.21,
79
- "learning_rate": 0.00017600000000000002,
80
- "loss": 1.9165,
81
  "step": 24
82
  },
83
  {
84
- "epoch": 0.23,
85
- "learning_rate": 0.000172,
86
- "loss": 1.8915,
87
  "step": 26
88
  },
89
  {
90
- "epoch": 0.25,
91
- "learning_rate": 0.000168,
92
- "loss": 1.9173,
93
  "step": 28
94
  },
95
  {
96
- "epoch": 0.27,
97
- "learning_rate": 0.000164,
98
- "loss": 1.8664,
99
  "step": 30
100
  },
101
  {
102
- "epoch": 0.29,
103
- "learning_rate": 0.00016,
104
- "loss": 1.889,
105
  "step": 32
106
  },
107
  {
108
- "epoch": 0.3,
109
- "learning_rate": 0.00015600000000000002,
110
- "loss": 1.8596,
111
  "step": 34
112
  },
113
  {
114
- "epoch": 0.32,
115
- "learning_rate": 0.000152,
116
- "loss": 1.8794,
117
  "step": 36
118
  },
119
  {
120
- "epoch": 0.34,
121
- "learning_rate": 0.000148,
122
- "loss": 1.7943,
123
  "step": 38
124
  },
125
  {
126
- "epoch": 0.36,
127
- "learning_rate": 0.000144,
128
- "loss": 1.845,
129
  "step": 40
130
  },
131
  {
132
- "epoch": 0.38,
133
- "learning_rate": 0.00014,
134
- "loss": 1.8562,
135
  "step": 42
136
  },
137
  {
138
- "epoch": 0.39,
139
- "learning_rate": 0.00013600000000000003,
140
- "loss": 1.8272,
141
  "step": 44
142
  },
143
  {
144
- "epoch": 0.41,
145
- "learning_rate": 0.000132,
146
- "loss": 1.7889,
147
  "step": 46
148
  },
149
  {
150
- "epoch": 0.43,
151
- "learning_rate": 0.00012800000000000002,
152
- "loss": 1.9758,
153
  "step": 48
154
  },
155
  {
156
- "epoch": 0.45,
157
- "learning_rate": 0.000124,
158
- "loss": 1.8208,
159
  "step": 50
160
  },
161
  {
162
- "epoch": 0.46,
163
- "learning_rate": 0.00012,
164
- "loss": 1.8818,
165
  "step": 52
166
  },
167
  {
168
- "epoch": 0.48,
169
- "learning_rate": 0.000116,
170
- "loss": 1.787,
171
  "step": 54
172
  },
173
  {
174
- "epoch": 0.5,
175
- "learning_rate": 0.00011200000000000001,
176
- "loss": 1.772,
177
  "step": 56
178
  },
179
  {
180
- "epoch": 0.52,
181
- "learning_rate": 0.00010800000000000001,
182
- "loss": 1.9647,
183
  "step": 58
184
  },
185
  {
186
- "epoch": 0.54,
187
- "learning_rate": 0.00010400000000000001,
188
- "loss": 1.8582,
189
  "step": 60
190
  },
191
  {
192
- "epoch": 0.55,
193
- "learning_rate": 0.0001,
194
- "loss": 1.7461,
195
  "step": 62
196
  },
197
  {
198
- "epoch": 0.57,
199
- "learning_rate": 9.6e-05,
200
- "loss": 1.9042,
201
  "step": 64
202
  },
203
  {
204
- "epoch": 0.59,
205
- "learning_rate": 9.200000000000001e-05,
206
- "loss": 1.8402,
207
  "step": 66
208
  },
209
  {
210
- "epoch": 0.61,
211
- "learning_rate": 8.800000000000001e-05,
212
- "loss": 1.8639,
213
  "step": 68
214
  },
215
  {
216
- "epoch": 0.62,
217
- "learning_rate": 8.4e-05,
218
- "loss": 1.8673,
219
  "step": 70
220
  },
221
  {
222
- "epoch": 0.64,
223
- "learning_rate": 8e-05,
224
- "loss": 1.9321,
225
  "step": 72
226
  },
227
  {
228
- "epoch": 0.66,
229
- "learning_rate": 7.6e-05,
230
- "loss": 1.836,
231
  "step": 74
232
  },
233
  {
234
- "epoch": 0.68,
235
- "learning_rate": 7.2e-05,
236
- "loss": 1.8173,
237
  "step": 76
238
  },
239
  {
240
- "epoch": 0.7,
241
- "learning_rate": 6.800000000000001e-05,
242
- "loss": 1.7188,
243
  "step": 78
244
  },
245
  {
246
- "epoch": 0.71,
247
- "learning_rate": 6.400000000000001e-05,
248
- "loss": 1.7957,
249
  "step": 80
250
  },
251
  {
252
- "epoch": 0.73,
253
- "learning_rate": 6e-05,
254
- "loss": 1.8139,
255
  "step": 82
256
  },
257
  {
258
- "epoch": 0.75,
259
- "learning_rate": 5.6000000000000006e-05,
260
- "loss": 1.811,
261
  "step": 84
262
  },
263
  {
264
- "epoch": 0.77,
265
- "learning_rate": 5.2000000000000004e-05,
266
- "loss": 1.7955,
267
  "step": 86
268
  },
269
  {
270
- "epoch": 0.79,
271
- "learning_rate": 4.8e-05,
272
- "loss": 1.9184,
273
  "step": 88
274
  },
275
  {
276
- "epoch": 0.8,
277
- "learning_rate": 4.4000000000000006e-05,
278
- "loss": 1.8547,
279
  "step": 90
280
  },
281
  {
282
- "epoch": 0.82,
283
- "learning_rate": 4e-05,
284
- "loss": 1.7657,
285
  "step": 92
286
  },
287
  {
288
- "epoch": 0.84,
289
- "learning_rate": 3.6e-05,
290
- "loss": 1.9175,
291
  "step": 94
292
  },
293
  {
294
- "epoch": 0.86,
295
- "learning_rate": 3.2000000000000005e-05,
296
- "loss": 1.7922,
297
  "step": 96
298
  },
299
  {
300
- "epoch": 0.88,
301
- "learning_rate": 2.8000000000000003e-05,
302
- "loss": 1.896,
303
  "step": 98
304
- },
305
- {
306
- "epoch": 0.89,
307
- "learning_rate": 2.4e-05,
308
- "loss": 1.8632,
309
- "step": 100
310
- },
311
- {
312
- "epoch": 0.91,
313
- "learning_rate": 2e-05,
314
- "loss": 1.881,
315
- "step": 102
316
- },
317
- {
318
- "epoch": 0.93,
319
- "learning_rate": 1.6000000000000003e-05,
320
- "loss": 1.7914,
321
- "step": 104
322
- },
323
- {
324
- "epoch": 0.95,
325
- "learning_rate": 1.2e-05,
326
- "loss": 1.7627,
327
- "step": 106
328
- },
329
- {
330
- "epoch": 0.96,
331
- "learning_rate": 8.000000000000001e-06,
332
- "loss": 1.7977,
333
- "step": 108
334
- },
335
- {
336
- "epoch": 0.98,
337
- "learning_rate": 4.000000000000001e-06,
338
- "loss": 1.8781,
339
- "step": 110
340
- },
341
- {
342
- "epoch": 1.0,
343
- "learning_rate": 0.0,
344
- "loss": 1.7793,
345
- "step": 112
346
  }
347
  ],
348
  "logging_steps": 2,
349
- "max_steps": 112,
350
- "num_train_epochs": 1,
351
  "save_steps": 500,
352
- "total_flos": 2.9635038387634176e+16,
353
  "trial_name": null,
354
  "trial_params": null
355
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.955223880597015,
5
  "eval_steps": 500,
6
+ "global_step": 99,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.06,
13
+ "learning_rate": 4e-05,
14
+ "loss": 2.0227,
15
  "step": 2
16
  },
17
  {
18
+ "epoch": 0.12,
19
+ "learning_rate": 8e-05,
20
+ "loss": 2.1777,
21
  "step": 4
22
  },
23
  {
24
+ "epoch": 0.18,
25
+ "learning_rate": 0.00012,
26
+ "loss": 1.5886,
27
  "step": 6
28
  },
29
  {
30
+ "epoch": 0.24,
31
+ "learning_rate": 0.00016,
32
+ "loss": 1.7764,
33
  "step": 8
34
  },
35
  {
36
+ "epoch": 0.3,
37
+ "learning_rate": 0.0002,
38
+ "loss": 1.8589,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.36,
43
+ "learning_rate": 0.0001955056179775281,
44
+ "loss": 1.8044,
45
  "step": 12
46
  },
47
  {
48
+ "epoch": 0.42,
49
+ "learning_rate": 0.00019101123595505618,
50
+ "loss": 1.3535,
51
  "step": 14
52
  },
53
  {
54
+ "epoch": 0.48,
55
+ "learning_rate": 0.00018651685393258427,
56
+ "loss": 1.5432,
57
  "step": 16
58
  },
59
  {
60
+ "epoch": 0.54,
61
+ "learning_rate": 0.00018202247191011236,
62
+ "loss": 1.4256,
63
  "step": 18
64
  },
65
  {
66
+ "epoch": 0.6,
67
+ "learning_rate": 0.00017752808988764045,
68
+ "loss": 1.3982,
69
  "step": 20
70
  },
71
  {
72
+ "epoch": 0.66,
73
+ "learning_rate": 0.00017303370786516853,
74
+ "loss": 1.2797,
75
  "step": 22
76
  },
77
  {
78
+ "epoch": 0.72,
79
+ "learning_rate": 0.00016853932584269662,
80
+ "loss": 1.5707,
81
  "step": 24
82
  },
83
  {
84
+ "epoch": 0.78,
85
+ "learning_rate": 0.00016404494382022474,
86
+ "loss": 1.5817,
87
  "step": 26
88
  },
89
  {
90
+ "epoch": 0.84,
91
+ "learning_rate": 0.0001595505617977528,
92
+ "loss": 1.4543,
93
  "step": 28
94
  },
95
  {
96
+ "epoch": 0.9,
97
+ "learning_rate": 0.0001550561797752809,
98
+ "loss": 1.7071,
99
  "step": 30
100
  },
101
  {
102
+ "epoch": 0.96,
103
+ "learning_rate": 0.000150561797752809,
104
+ "loss": 1.8541,
105
  "step": 32
106
  },
107
  {
108
+ "epoch": 1.01,
109
+ "learning_rate": 0.0001460674157303371,
110
+ "loss": 2.0083,
111
  "step": 34
112
  },
113
  {
114
+ "epoch": 1.07,
115
+ "learning_rate": 0.00014157303370786517,
116
+ "loss": 1.5323,
117
  "step": 36
118
  },
119
  {
120
+ "epoch": 1.13,
121
+ "learning_rate": 0.00013707865168539326,
122
+ "loss": 1.6096,
123
  "step": 38
124
  },
125
  {
126
+ "epoch": 1.19,
127
+ "learning_rate": 0.00013258426966292135,
128
+ "loss": 1.7131,
129
  "step": 40
130
  },
131
  {
132
+ "epoch": 1.25,
133
+ "learning_rate": 0.00012808988764044944,
134
+ "loss": 1.5119,
135
  "step": 42
136
  },
137
  {
138
+ "epoch": 1.31,
139
+ "learning_rate": 0.00012359550561797752,
140
+ "loss": 1.1917,
141
  "step": 44
142
  },
143
  {
144
+ "epoch": 1.37,
145
+ "learning_rate": 0.00011910112359550563,
146
+ "loss": 1.3534,
147
  "step": 46
148
  },
149
  {
150
+ "epoch": 1.43,
151
+ "learning_rate": 0.0001146067415730337,
152
+ "loss": 1.2448,
153
  "step": 48
154
  },
155
  {
156
+ "epoch": 1.49,
157
+ "learning_rate": 0.0001101123595505618,
158
+ "loss": 1.4007,
159
  "step": 50
160
  },
161
  {
162
+ "epoch": 1.55,
163
+ "learning_rate": 0.00010561797752808989,
164
+ "loss": 1.4253,
165
  "step": 52
166
  },
167
  {
168
+ "epoch": 1.61,
169
+ "learning_rate": 0.00010112359550561799,
170
+ "loss": 1.7311,
171
  "step": 54
172
  },
173
  {
174
+ "epoch": 1.67,
175
+ "learning_rate": 9.662921348314608e-05,
176
+ "loss": 1.0368,
177
  "step": 56
178
  },
179
  {
180
+ "epoch": 1.73,
181
+ "learning_rate": 9.213483146067416e-05,
182
+ "loss": 1.3187,
183
  "step": 58
184
  },
185
  {
186
+ "epoch": 1.79,
187
+ "learning_rate": 8.764044943820225e-05,
188
+ "loss": 1.2071,
189
  "step": 60
190
  },
191
  {
192
+ "epoch": 1.85,
193
+ "learning_rate": 8.314606741573034e-05,
194
+ "loss": 1.764,
195
  "step": 62
196
  },
197
  {
198
+ "epoch": 1.91,
199
+ "learning_rate": 7.865168539325843e-05,
200
+ "loss": 1.4301,
201
  "step": 64
202
  },
203
  {
204
+ "epoch": 1.97,
205
+ "learning_rate": 7.415730337078653e-05,
206
+ "loss": 1.586,
207
  "step": 66
208
  },
209
  {
210
+ "epoch": 2.03,
211
+ "learning_rate": 6.966292134831462e-05,
212
+ "loss": 1.5166,
213
  "step": 68
214
  },
215
  {
216
+ "epoch": 2.09,
217
+ "learning_rate": 6.51685393258427e-05,
218
+ "loss": 1.4832,
219
  "step": 70
220
  },
221
  {
222
+ "epoch": 2.15,
223
+ "learning_rate": 6.067415730337079e-05,
224
+ "loss": 1.3071,
225
  "step": 72
226
  },
227
  {
228
+ "epoch": 2.21,
229
+ "learning_rate": 5.6179775280898885e-05,
230
+ "loss": 1.3457,
231
  "step": 74
232
  },
233
  {
234
+ "epoch": 2.27,
235
+ "learning_rate": 5.168539325842697e-05,
236
+ "loss": 1.1732,
237
  "step": 76
238
  },
239
  {
240
+ "epoch": 2.33,
241
+ "learning_rate": 4.719101123595506e-05,
242
+ "loss": 1.2885,
243
  "step": 78
244
  },
245
  {
246
+ "epoch": 2.39,
247
+ "learning_rate": 4.269662921348315e-05,
248
+ "loss": 1.0507,
249
  "step": 80
250
  },
251
  {
252
+ "epoch": 2.45,
253
+ "learning_rate": 3.8202247191011236e-05,
254
+ "loss": 1.4467,
255
  "step": 82
256
  },
257
  {
258
+ "epoch": 2.51,
259
+ "learning_rate": 3.370786516853933e-05,
260
+ "loss": 1.5236,
261
  "step": 84
262
  },
263
  {
264
+ "epoch": 2.57,
265
+ "learning_rate": 2.9213483146067417e-05,
266
+ "loss": 1.1199,
267
  "step": 86
268
  },
269
  {
270
+ "epoch": 2.63,
271
+ "learning_rate": 2.4719101123595505e-05,
272
+ "loss": 1.4098,
273
  "step": 88
274
  },
275
  {
276
+ "epoch": 2.69,
277
+ "learning_rate": 2.0224719101123596e-05,
278
+ "loss": 1.2576,
279
  "step": 90
280
  },
281
  {
282
+ "epoch": 2.75,
283
+ "learning_rate": 1.5730337078651687e-05,
284
+ "loss": 1.2179,
285
  "step": 92
286
  },
287
  {
288
+ "epoch": 2.81,
289
+ "learning_rate": 1.1235955056179776e-05,
290
+ "loss": 1.6813,
291
  "step": 94
292
  },
293
  {
294
+ "epoch": 2.87,
295
+ "learning_rate": 6.741573033707865e-06,
296
+ "loss": 1.3637,
297
  "step": 96
298
  },
299
  {
300
+ "epoch": 2.93,
301
+ "learning_rate": 2.247191011235955e-06,
302
+ "loss": 1.3635,
303
  "step": 98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  }
305
  ],
306
  "logging_steps": 2,
307
+ "max_steps": 99,
308
+ "num_train_epochs": 3,
309
  "save_steps": 500,
310
+ "total_flos": 2.619520731788083e+16,
311
  "trial_name": null,
312
  "trial_params": null
313
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5868d56ccca2b526a144ee1f7fa1118d47eb27be4eef91132608f5f6924f8c7c
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:667639befa7411dfffdec199ef1e6179390d304e4531677c185ca673c703bc09
3
  size 4027