wdevazelhes commited on
Commit
b74b487
1 Parent(s): f78a522

fix: use correct rounding

Browse files
Files changed (1) hide show
  1. README.md +23 -23
README.md CHANGED
@@ -159,40 +159,40 @@ Falcon3-7B is trained on 256 H100 nodes (world size 2048).
159
  <td>MMLU (5-shot)</td>
160
  <td>31.1</td>
161
  <td>61.0</td>
162
- <td>50.2</td>
163
- <td>53.1</td>
164
  <td>42.5</td>
165
  </tr>
166
  <tr>
167
  <td>MMLU-PRO (5-shot)</td>
168
  <td>11.7</td>
169
- <td>28.5</td>
170
- <td>21.4</td>
171
  <td>22.1</td>
172
- <td>16.2</td>
173
  </tr>
174
  <tr>
175
  <td>IFEval</td>
176
- <td>14.9</td>
177
- <td>26.1</td>
178
  <td>24.2</td>
179
- <td>20.4</td>
180
- <td>25.3</td>
181
  </tr>
182
  <tr>
183
  <td rowspan="2">Math</td>
184
  <td>GSM8K (5-shot)</td>
185
  <td>6.6</td>
186
- <td>62.3</td>
187
- <td>31.1</td>
188
- <td>25.6</td>
189
  <td>34.3</td>
190
  </tr>
191
  <tr>
192
  <td>MATH Lvl-5 (4-shot)</td>
193
- <td>0.3</td>
194
- <td>6.8</td>
195
- <td>1.5</td>
196
  <td>2.6</td>
197
  <td>2.2</td>
198
  </tr>
@@ -203,12 +203,12 @@ Falcon3-7B is trained on 256 H100 nodes (world size 2048).
203
  <td>54.8</td>
204
  <td>54.1</td>
205
  <td>53.7</td>
206
- <td>48.2</td>
207
  </tr>
208
  <tr>
209
  <td>GPQA (0-shot)</td>
210
- <td>24.3</td>
211
- <td>28.2</td>
212
  <td>28.9</td>
213
  <td>25.5</td>
214
  <td>28.1</td>
@@ -217,22 +217,22 @@ Falcon3-7B is trained on 256 H100 nodes (world size 2048).
217
  <td>MUSR (0-shot)</td>
218
  <td>34.5</td>
219
  <td>35.5</td>
220
- <td>34.8</td>
221
- <td>42.8</td>
222
  <td>41.9</td>
223
  </tr>
224
  <tr>
225
  <td>BBH (3-shot)</td>
226
  <td>31.2</td>
227
  <td>41.1</td>
228
- <td>34.3</td>
229
  <td>36.8</td>
230
- <td>36.1</td>
231
  </tr>
232
  <tr>
233
  <td rowspan="4">CommonSense Understanding</td>
234
  <td>PIQA (0-shot)</td>
235
- <td>74.6</td>
236
  <td>76.0</td>
237
  <td>77.5</td>
238
  <td>79.2</td>
 
159
  <td>MMLU (5-shot)</td>
160
  <td>31.1</td>
161
  <td>61.0</td>
162
+ <td>50.1</td>
163
+ <td>53.0</td>
164
  <td>42.5</td>
165
  </tr>
166
  <tr>
167
  <td>MMLU-PRO (5-shot)</td>
168
  <td>11.7</td>
169
+ <td>28.4</td>
170
+ <td>21.3</td>
171
  <td>22.1</td>
172
+ <td>16.1</td>
173
  </tr>
174
  <tr>
175
  <td>IFEval</td>
176
+ <td>14.8</td>
177
+ <td>26.0</td>
178
  <td>24.2</td>
179
+ <td>20.3</td>
180
+ <td>25.2</td>
181
  </tr>
182
  <tr>
183
  <td rowspan="2">Math</td>
184
  <td>GSM8K (5-shot)</td>
185
  <td>6.6</td>
186
+ <td>62.2</td>
187
+ <td>31.0</td>
188
+ <td>25.5</td>
189
  <td>34.3</td>
190
  </tr>
191
  <tr>
192
  <td>MATH Lvl-5 (4-shot)</td>
193
+ <td>0.2</td>
194
+ <td>6.7</td>
195
+ <td>1.4</td>
196
  <td>2.6</td>
197
  <td>2.2</td>
198
  </tr>
 
203
  <td>54.8</td>
204
  <td>54.1</td>
205
  <td>53.7</td>
206
+ <td>48.1</td>
207
  </tr>
208
  <tr>
209
  <td>GPQA (0-shot)</td>
210
+ <td>24.2</td>
211
+ <td>28.1</td>
212
  <td>28.9</td>
213
  <td>25.5</td>
214
  <td>28.1</td>
 
217
  <td>MUSR (0-shot)</td>
218
  <td>34.5</td>
219
  <td>35.5</td>
220
+ <td>34.7</td>
221
+ <td>42.7</td>
222
  <td>41.9</td>
223
  </tr>
224
  <tr>
225
  <td>BBH (3-shot)</td>
226
  <td>31.2</td>
227
  <td>41.1</td>
228
+ <td>34.2</td>
229
  <td>36.8</td>
230
+ <td>36.0</td>
231
  </tr>
232
  <tr>
233
  <td rowspan="4">CommonSense Understanding</td>
234
  <td>PIQA (0-shot)</td>
235
+ <td>74.5</td>
236
  <td>76.0</td>
237
  <td>77.5</td>
238
  <td>79.2</td>