susumuota commited on
Commit
0207fc5
·
verified ·
1 Parent(s): a923776

Model save

Browse files
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
  base_model: Qwen/Qwen2.5-1.5B-Instruct
3
- datasets: HuggingFaceH4/Bespoke-Stratos-17k
4
  library_name: transformers
5
  model_name: Qwen2.5-1.5B-Open-R1-Distill
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - sft
11
  licence: license
@@ -13,7 +11,7 @@ licence: license
13
 
14
  # Model Card for Qwen2.5-1.5B-Open-R1-Distill
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on the [HuggingFaceH4/Bespoke-Stratos-17k](https://huggingface.co/datasets/HuggingFaceH4/Bespoke-Stratos-17k) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/llm-m_wandb-weblab/Qwen2.5-1.5B/runs/b8odb8mc)
33
 
34
 
35
  This model was trained with SFT.
 
1
  ---
2
  base_model: Qwen/Qwen2.5-1.5B-Instruct
 
3
  library_name: transformers
4
  model_name: Qwen2.5-1.5B-Open-R1-Distill
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - sft
9
  licence: license
 
11
 
12
  # Model Card for Qwen2.5-1.5B-Open-R1-Distill
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/llm-m_wandb-weblab/Qwen2.5-1.5B/runs/op7h76qk)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -5,10 +5,10 @@
5
  "eval_samples": 100,
6
  "eval_samples_per_second": 161.504,
7
  "eval_steps_per_second": 5.047,
8
- "total_flos": 76916824473600.0,
9
- "train_loss": 0.8026494443769285,
10
- "train_runtime": 500.9768,
11
  "train_samples": 16610,
12
- "train_samples_per_second": 43.136,
13
- "train_steps_per_second": 0.337
14
  }
 
5
  "eval_samples": 100,
6
  "eval_samples_per_second": 161.504,
7
  "eval_steps_per_second": 5.047,
8
+ "total_flos": 76973799899136.0,
9
+ "train_loss": 0.8025016911636443,
10
+ "train_runtime": 750.6152,
11
  "train_samples": 16610,
12
+ "train_samples_per_second": 28.807,
13
+ "train_steps_per_second": 0.225
14
  }
config.json CHANGED
@@ -23,7 +23,7 @@
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.49.0.dev0",
26
- "use_cache": true,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
29
  }
 
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.49.0.dev0",
26
+ "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
29
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1260fda8a12b3724ea50f2d94d94d6b1d67039e55f8ec9ee538d5cac0eaf0072
3
  size 3087467144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c54337c035c95cdd4fd6ad2ef63dd3251ab63d2f348609987fa76b0a05587b0
3
  size 3087467144
train_results.json CHANGED
@@ -1,9 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "total_flos": 76916824473600.0,
4
- "train_loss": 0.8026494443769285,
5
- "train_runtime": 500.9768,
6
  "train_samples": 16610,
7
- "train_samples_per_second": 43.136,
8
- "train_steps_per_second": 0.337
9
  }
 
1
  {
2
+ "total_flos": 76973799899136.0,
3
+ "train_loss": 0.8025016911636443,
4
+ "train_runtime": 750.6152,
 
5
  "train_samples": 16610,
6
+ "train_samples_per_second": 28.807,
7
+ "train_steps_per_second": 0.225
8
  }
trainer_state.json CHANGED
@@ -10,251 +10,286 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.029585798816568046,
13
- "grad_norm": 2.3441456641162017,
14
  "learning_rate": 5.882352941176471e-06,
15
- "loss": 1.0993,
 
16
  "step": 5
17
  },
18
  {
19
  "epoch": 0.05917159763313609,
20
- "grad_norm": 1.633349804365292,
21
  "learning_rate": 1.1764705882352942e-05,
22
- "loss": 1.0403,
 
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.08875739644970414,
27
- "grad_norm": 0.8842564361880783,
28
  "learning_rate": 1.7647058823529414e-05,
29
- "loss": 0.9533,
 
30
  "step": 15
31
  },
32
  {
33
  "epoch": 0.11834319526627218,
34
- "grad_norm": 0.6055203960588147,
35
  "learning_rate": 1.9980782984658682e-05,
36
- "loss": 0.8922,
 
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.14792899408284024,
41
- "grad_norm": 0.5402463110888628,
42
  "learning_rate": 1.9863613034027224e-05,
43
- "loss": 0.8552,
 
44
  "step": 25
45
  },
46
  {
47
  "epoch": 0.17751479289940827,
48
- "grad_norm": 0.4258701840680344,
49
  "learning_rate": 1.9641197940012136e-05,
50
- "loss": 0.8283,
 
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.20710059171597633,
55
- "grad_norm": 0.37541501055321486,
56
  "learning_rate": 1.9315910880512792e-05,
57
- "loss": 0.8229,
 
58
  "step": 35
59
  },
60
  {
61
  "epoch": 0.23668639053254437,
62
- "grad_norm": 0.3981049785839041,
63
  "learning_rate": 1.8891222681391853e-05,
64
- "loss": 0.8225,
 
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.26627218934911245,
69
- "grad_norm": 0.3442641995819869,
70
  "learning_rate": 1.8371664782625287e-05,
71
- "loss": 0.8073,
 
72
  "step": 45
73
  },
74
  {
75
  "epoch": 0.2958579881656805,
76
- "grad_norm": 0.334462527994632,
77
  "learning_rate": 1.7762780887657576e-05,
78
- "loss": 0.7977,
 
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.3254437869822485,
83
- "grad_norm": 0.3680761193201223,
84
  "learning_rate": 1.7071067811865477e-05,
85
- "loss": 0.7877,
 
86
  "step": 55
87
  },
88
  {
89
  "epoch": 0.35502958579881655,
90
- "grad_norm": 0.3692193166132139,
91
  "learning_rate": 1.6303906161279554e-05,
92
- "loss": 0.7981,
 
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.38461538461538464,
97
- "grad_norm": 0.34448390578574,
98
  "learning_rate": 1.5469481581224274e-05,
99
- "loss": 0.7721,
 
100
  "step": 65
101
  },
102
  {
103
  "epoch": 0.41420118343195267,
104
- "grad_norm": 0.3452861149232402,
105
  "learning_rate": 1.4576697415156818e-05,
106
- "loss": 0.7741,
 
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.4437869822485207,
111
- "grad_norm": 0.3363364574541925,
112
  "learning_rate": 1.3635079705638298e-05,
113
- "loss": 0.7852,
 
114
  "step": 75
115
  },
116
  {
117
  "epoch": 0.47337278106508873,
118
- "grad_norm": 0.35158679411054844,
119
  "learning_rate": 1.2654675551080724e-05,
120
- "loss": 0.7666,
 
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.5029585798816568,
125
- "grad_norm": 0.3604023099826107,
126
  "learning_rate": 1.164594590280734e-05,
127
- "loss": 0.7706,
 
128
  "step": 85
129
  },
130
  {
131
  "epoch": 0.5325443786982249,
132
- "grad_norm": 0.35782845320303824,
133
  "learning_rate": 1.0619653946285948e-05,
134
- "loss": 0.77,
 
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.5621301775147929,
139
- "grad_norm": 0.382442839254331,
140
  "learning_rate": 9.586750257511868e-06,
141
- "loss": 0.7771,
 
142
  "step": 95
143
  },
144
  {
145
  "epoch": 0.591715976331361,
146
- "grad_norm": 0.3330090428360514,
147
  "learning_rate": 8.558255959926533e-06,
148
- "loss": 0.753,
 
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.591715976331361,
153
- "eval_loss": 0.7880030870437622,
154
- "eval_runtime": 0.8026,
155
- "eval_samples_per_second": 159.487,
156
- "eval_steps_per_second": 4.984,
 
157
  "step": 100
158
  },
159
  {
160
  "epoch": 0.621301775147929,
161
- "grad_norm": 0.33382381973562353,
162
  "learning_rate": 7.545145128592009e-06,
163
- "loss": 0.7569,
 
164
  "step": 105
165
  },
166
  {
167
  "epoch": 0.650887573964497,
168
- "grad_norm": 0.342407693906868,
169
  "learning_rate": 6.558227696373617e-06,
170
- "loss": 0.7681,
 
171
  "step": 110
172
  },
173
  {
174
  "epoch": 0.6804733727810651,
175
- "grad_norm": 0.30933337277908973,
176
  "learning_rate": 5.608034111526298e-06,
177
- "loss": 0.7623,
 
178
  "step": 115
179
  },
180
  {
181
  "epoch": 0.7100591715976331,
182
- "grad_norm": 0.2990413611811013,
183
  "learning_rate": 4.704702977392914e-06,
184
- "loss": 0.7514,
 
185
  "step": 120
186
  },
187
  {
188
  "epoch": 0.7396449704142012,
189
- "grad_norm": 0.34349997716613806,
190
  "learning_rate": 3.857872873103322e-06,
191
- "loss": 0.7538,
 
192
  "step": 125
193
  },
194
  {
195
  "epoch": 0.7692307692307693,
196
- "grad_norm": 0.3113170176717584,
197
  "learning_rate": 3.0765795095517026e-06,
198
- "loss": 0.7555,
 
199
  "step": 130
200
  },
201
  {
202
  "epoch": 0.7988165680473372,
203
- "grad_norm": 0.3230681953500364,
204
  "learning_rate": 2.369159318001937e-06,
205
- "loss": 0.7584,
 
206
  "step": 135
207
  },
208
  {
209
  "epoch": 0.8284023668639053,
210
- "grad_norm": 0.2991556070852993,
211
  "learning_rate": 1.743160500034443e-06,
212
- "loss": 0.7498,
 
213
  "step": 140
214
  },
215
  {
216
  "epoch": 0.8579881656804734,
217
- "grad_norm": 0.3114696672768526,
218
  "learning_rate": 1.2052624879351105e-06,
219
- "loss": 0.7566,
 
220
  "step": 145
221
  },
222
  {
223
  "epoch": 0.8875739644970414,
224
- "grad_norm": 0.2949319010398544,
225
  "learning_rate": 7.612046748871327e-07,
226
- "loss": 0.7666,
 
227
  "step": 150
228
  },
229
  {
230
  "epoch": 0.9171597633136095,
231
- "grad_norm": 0.29707992694247826,
232
  "learning_rate": 4.1572517541747294e-07,
233
- "loss": 0.7613,
 
234
  "step": 155
235
  },
236
  {
237
  "epoch": 0.9467455621301775,
238
- "grad_norm": 0.2858348422013152,
239
  "learning_rate": 1.7251026952640583e-07,
240
- "loss": 0.7607,
 
241
  "step": 160
242
  },
243
  {
244
  "epoch": 0.9763313609467456,
245
- "grad_norm": 0.27606916608468957,
246
  "learning_rate": 3.4155069933301535e-08,
247
- "loss": 0.7445,
 
248
  "step": 165
249
  },
250
  {
251
  "epoch": 1.0,
 
252
  "step": 169,
253
- "total_flos": 76916824473600.0,
254
- "train_loss": 0.8026494443769285,
255
- "train_runtime": 500.9768,
256
- "train_samples_per_second": 43.136,
257
- "train_steps_per_second": 0.337
258
  }
259
  ],
260
  "logging_steps": 5,
@@ -274,8 +309,8 @@
274
  "attributes": {}
275
  }
276
  },
277
- "total_flos": 76916824473600.0,
278
- "train_batch_size": 4,
279
  "trial_name": null,
280
  "trial_params": null
281
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 0.029585798816568046,
13
+ "grad_norm": 2.297424387110319,
14
  "learning_rate": 5.882352941176471e-06,
15
+ "loss": 1.1002,
16
+ "mean_token_accuracy": 0.710122095857212,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.05917159763313609,
21
+ "grad_norm": 1.5962459582070525,
22
  "learning_rate": 1.1764705882352942e-05,
23
+ "loss": 1.0326,
24
+ "mean_token_accuracy": 0.7201974025955006,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.08875739644970414,
29
+ "grad_norm": 0.84813226022544,
30
  "learning_rate": 1.7647058823529414e-05,
31
+ "loss": 0.9517,
32
+ "mean_token_accuracy": 0.7326376870412972,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.11834319526627218,
37
+ "grad_norm": 0.6449426089322932,
38
  "learning_rate": 1.9980782984658682e-05,
39
+ "loss": 0.8804,
40
+ "mean_token_accuracy": 0.7473364378856827,
41
  "step": 20
42
  },
43
  {
44
  "epoch": 0.14792899408284024,
45
+ "grad_norm": 0.5511541331468433,
46
  "learning_rate": 1.9863613034027224e-05,
47
+ "loss": 0.8536,
48
+ "mean_token_accuracy": 0.7518443039297401,
49
  "step": 25
50
  },
51
  {
52
  "epoch": 0.17751479289940827,
53
+ "grad_norm": 0.442628562609923,
54
  "learning_rate": 1.9641197940012136e-05,
55
+ "loss": 0.8404,
56
+ "mean_token_accuracy": 0.7545613836853171,
57
  "step": 30
58
  },
59
  {
60
  "epoch": 0.20710059171597633,
61
+ "grad_norm": 0.3950445105792925,
62
  "learning_rate": 1.9315910880512792e-05,
63
+ "loss": 0.8212,
64
+ "mean_token_accuracy": 0.7580693317680411,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.23668639053254437,
69
+ "grad_norm": 0.38499101012568593,
70
  "learning_rate": 1.8891222681391853e-05,
71
+ "loss": 0.805,
72
+ "mean_token_accuracy": 0.7621003697400892,
73
  "step": 40
74
  },
75
  {
76
  "epoch": 0.26627218934911245,
77
+ "grad_norm": 0.426957308734079,
78
  "learning_rate": 1.8371664782625287e-05,
79
+ "loss": 0.7984,
80
+ "mean_token_accuracy": 0.763465327169843,
81
  "step": 45
82
  },
83
  {
84
  "epoch": 0.2958579881656805,
85
+ "grad_norm": 0.3931886133887098,
86
  "learning_rate": 1.7762780887657576e-05,
87
+ "loss": 0.7937,
88
+ "mean_token_accuracy": 0.7642841154247401,
89
  "step": 50
90
  },
91
  {
92
  "epoch": 0.3254437869822485,
93
+ "grad_norm": 0.37612205413583966,
94
  "learning_rate": 1.7071067811865477e-05,
95
+ "loss": 0.7894,
96
+ "mean_token_accuracy": 0.7649155785682786,
97
  "step": 55
98
  },
99
  {
100
  "epoch": 0.35502958579881655,
101
+ "grad_norm": 0.3690088505249562,
102
  "learning_rate": 1.6303906161279554e-05,
103
+ "loss": 0.7999,
104
+ "mean_token_accuracy": 0.761950455440128,
105
  "step": 60
106
  },
107
  {
108
  "epoch": 0.38461538461538464,
109
+ "grad_norm": 0.3968668346625297,
110
  "learning_rate": 1.5469481581224274e-05,
111
+ "loss": 0.7867,
112
+ "mean_token_accuracy": 0.764712581061752,
113
  "step": 65
114
  },
115
  {
116
  "epoch": 0.41420118343195267,
117
+ "grad_norm": 0.37433783603713877,
118
  "learning_rate": 1.4576697415156818e-05,
119
+ "loss": 0.7782,
120
+ "mean_token_accuracy": 0.7675159747533142,
121
  "step": 70
122
  },
123
  {
124
  "epoch": 0.4437869822485207,
125
+ "grad_norm": 0.33916682700714146,
126
  "learning_rate": 1.3635079705638298e-05,
127
+ "loss": 0.777,
128
+ "mean_token_accuracy": 0.7675317758209201,
129
  "step": 75
130
  },
131
  {
132
  "epoch": 0.47337278106508873,
133
+ "grad_norm": 0.34679361649836743,
134
  "learning_rate": 1.2654675551080724e-05,
135
+ "loss": 0.7783,
136
+ "mean_token_accuracy": 0.7665991894058249,
137
  "step": 80
138
  },
139
  {
140
  "epoch": 0.5029585798816568,
141
+ "grad_norm": 0.3364148123003455,
142
  "learning_rate": 1.164594590280734e-05,
143
+ "loss": 0.7785,
144
+ "mean_token_accuracy": 0.7670094150622615,
145
  "step": 85
146
  },
147
  {
148
  "epoch": 0.5325443786982249,
149
+ "grad_norm": 0.3495734033672351,
150
  "learning_rate": 1.0619653946285948e-05,
151
+ "loss": 0.7723,
152
+ "mean_token_accuracy": 0.7682055741851116,
153
  "step": 90
154
  },
155
  {
156
  "epoch": 0.5621301775147929,
157
+ "grad_norm": 0.3432574047378308,
158
  "learning_rate": 9.586750257511868e-06,
159
+ "loss": 0.7669,
160
+ "mean_token_accuracy": 0.7697794763971556,
161
  "step": 95
162
  },
163
  {
164
  "epoch": 0.591715976331361,
165
+ "grad_norm": 0.34082459115638813,
166
  "learning_rate": 8.558255959926533e-06,
167
+ "loss": 0.7582,
168
+ "mean_token_accuracy": 0.7714136401538525,
169
  "step": 100
170
  },
171
  {
172
  "epoch": 0.591715976331361,
173
+ "eval_loss": 0.7885270118713379,
174
+ "eval_mean_token_accuracy": 0.7488056872787593,
175
+ "eval_runtime": 2.2705,
176
+ "eval_samples_per_second": 56.816,
177
+ "eval_steps_per_second": 2.202,
178
  "step": 100
179
  },
180
  {
181
  "epoch": 0.621301775147929,
182
+ "grad_norm": 0.3500865265546408,
183
  "learning_rate": 7.545145128592009e-06,
184
+ "loss": 0.759,
185
+ "mean_token_accuracy": 0.7715606650120836,
186
  "step": 105
187
  },
188
  {
189
  "epoch": 0.650887573964497,
190
+ "grad_norm": 0.3153601426300555,
191
  "learning_rate": 6.558227696373617e-06,
192
+ "loss": 0.7775,
193
+ "mean_token_accuracy": 0.7664943003135225,
194
  "step": 110
195
  },
196
  {
197
  "epoch": 0.6804733727810651,
198
+ "grad_norm": 0.3077751865894769,
199
  "learning_rate": 5.608034111526298e-06,
200
+ "loss": 0.7636,
201
+ "mean_token_accuracy": 0.7702647251741424,
202
  "step": 115
203
  },
204
  {
205
  "epoch": 0.7100591715976331,
206
+ "grad_norm": 0.2978423880943666,
207
  "learning_rate": 4.704702977392914e-06,
208
+ "loss": 0.7511,
209
+ "mean_token_accuracy": 0.7742831183944568,
210
  "step": 120
211
  },
212
  {
213
  "epoch": 0.7396449704142012,
214
+ "grad_norm": 0.3160810225440028,
215
  "learning_rate": 3.857872873103322e-06,
216
+ "loss": 0.7594,
217
+ "mean_token_accuracy": 0.7718046260565352,
218
  "step": 125
219
  },
220
  {
221
  "epoch": 0.7692307692307693,
222
+ "grad_norm": 0.3193177717083944,
223
  "learning_rate": 3.0765795095517026e-06,
224
+ "loss": 0.7623,
225
+ "mean_token_accuracy": 0.7706338288914129,
226
  "step": 130
227
  },
228
  {
229
  "epoch": 0.7988165680473372,
230
+ "grad_norm": 0.301902197424481,
231
  "learning_rate": 2.369159318001937e-06,
232
+ "loss": 0.7684,
233
+ "mean_token_accuracy": 0.7688698616474955,
234
  "step": 135
235
  },
236
  {
237
  "epoch": 0.8284023668639053,
238
+ "grad_norm": 0.2941951800393329,
239
  "learning_rate": 1.743160500034443e-06,
240
+ "loss": 0.7596,
241
+ "mean_token_accuracy": 0.7716673849461525,
242
  "step": 140
243
  },
244
  {
245
  "epoch": 0.8579881656804734,
246
+ "grad_norm": 0.3117346710739024,
247
  "learning_rate": 1.2052624879351105e-06,
248
+ "loss": 0.7595,
249
+ "mean_token_accuracy": 0.7714754205949017,
250
  "step": 145
251
  },
252
  {
253
  "epoch": 0.8875739644970414,
254
+ "grad_norm": 0.2910628005200283,
255
  "learning_rate": 7.612046748871327e-07,
256
+ "loss": 0.7505,
257
+ "mean_token_accuracy": 0.7739860912523475,
258
  "step": 150
259
  },
260
  {
261
  "epoch": 0.9171597633136095,
262
+ "grad_norm": 0.28002315513328135,
263
  "learning_rate": 4.1572517541747294e-07,
264
+ "loss": 0.741,
265
+ "mean_token_accuracy": 0.7767315836804676,
266
  "step": 155
267
  },
268
  {
269
  "epoch": 0.9467455621301775,
270
+ "grad_norm": 0.2957619359463069,
271
  "learning_rate": 1.7251026952640583e-07,
272
+ "loss": 0.754,
273
+ "mean_token_accuracy": 0.7732596586427178,
274
  "step": 160
275
  },
276
  {
277
  "epoch": 0.9763313609467456,
278
+ "grad_norm": 0.2762985231070698,
279
  "learning_rate": 3.4155069933301535e-08,
280
+ "loss": 0.7467,
281
+ "mean_token_accuracy": 0.7749716424638167,
282
  "step": 165
283
  },
284
  {
285
  "epoch": 1.0,
286
+ "mean_token_accuracy": 0.7704617250967111,
287
  "step": 169,
288
+ "total_flos": 76973799899136.0,
289
+ "train_loss": 0.8025016911636443,
290
+ "train_runtime": 750.6152,
291
+ "train_samples_per_second": 28.807,
292
+ "train_steps_per_second": 0.225
293
  }
294
  ],
295
  "logging_steps": 5,
 
309
  "attributes": {}
310
  }
311
  },
312
+ "total_flos": 76973799899136.0,
313
+ "train_batch_size": 2,
314
  "trial_name": null,
315
  "trial_params": null
316
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44893394a0eba7fd46589a49f8774d43c799edd92b85f27ff4f5ab8717375e95
3
  size 7352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4745bf1543ad52003139f9be07590fe262ca4314dff5d0f7b2e7d7f042b61faa
3
  size 7352