Midm-LLM commited on
Commit
01af797
·
verified ·
1 Parent(s): 1c842c4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +373 -354
README.md CHANGED
@@ -127,367 +127,385 @@ print(tokenizer.decode(output[0]))
127
 
128
  # Evaluation
129
 
130
- #### English
 
 
 
131
  <table>
132
- <thead>
133
- <tr>
134
- <th colspan="2"><b>Benchmark</b></th>
135
- <th>Exaone-3.5-2.4B-inst</th>
136
- <th>Qwen3-4B</th>
137
- <th>Mi:dm 2.0-Mini-inst</th>
138
- <th>Exaone-3.5-7.8B-inst</th>
139
- <th>Qwen3-14B</th>
140
- <th>Llama-3.1-8B-inst</th>
141
- <th>Mi:dm 2.0-Base-inst</th>
142
- </tr>
143
- </thead>
144
- <tbody>
145
- <tr>
146
- <td rowspan="1"><b>Instruction Following</b></td>
147
- <td><b>IFEval</b></td>
148
- <td align="center">81.1</td>
149
- <td align="center">79.7</td>
150
- <td align="center">73.6</td>
151
- <td align="center">83.6</td>
152
- <td align="center">83.9</td>
153
- <td align="center">79.9</td>
154
- <td align="center"><b>84.0</b></td>
155
- </tr>
156
- <tr>
157
- <td rowspan="4"><b>Reasoning</b></td>
158
- <td><b>BBH</b></td>
159
- <td align="center">46.4</td>
160
- <td align="center">79.0</td>
161
- <td align="center">44.5</td>
162
- <td align="center">50.1</td>
163
- <td align="center">83.4</td>
164
- <td align="center">60.3</td>
165
- <td align="center"><b>77.7</b></td>
166
- </tr>
167
- <tr>
168
- <td><b>GPQA</b></td>
169
- <td align="center">28.1</td>
170
- <td align="center">39.8</td>
171
- <td align="center">26.6</td>
172
- <td align="center">33.1</td>
173
- <td align="center">49.8</td>
174
- <td align="center">21.6</td>
175
- <td align="center"><b>33.5</b></td>
176
- </tr>
177
- <tr>
178
- <td><b>MuSR</b></td>
179
- <td align="center">49.7</td>
180
- <td align="center">58.5</td>
181
- <td align="center">51.7</td>
182
- <td align="center">51.2</td>
183
- <td align="center">57.7</td>
184
- <td align="center">50.3</td>
185
- <td align="center"><b>51.9</b></td>
186
- </tr>
187
- <tr>
188
- <td><b>Avg.</b></td>
189
- <td align="center">41.4</td>
190
- <td align="center">59.1</td>
191
- <td align="center">40.9</td>
192
- <td align="center">44.8</td>
193
- <td align="center">63.6</td>
194
- <td align="center">44.1</td>
195
- <td align="center"><b>54.4</b></td>
196
- </tr>
197
- <tr>
198
- <td rowspan="2"><b>Mathematics</b></td>
199
- <td><b>GSM8K</b></td>
200
- <td align="center">82.5</td>
201
- <td align="center">90.4</td>
202
- <td align="center">83.1</td>
203
- <td align="center">81.1</td>
204
- <td align="center">88.0</td>
205
- <td align="center">81.2</td>
206
- <td align="center"><b>91.6</b></td>
207
- </tr>
208
- <tr>
209
- <td><b>MBPP+</b></td>
210
- <td align="center">59.8</td>
211
- <td align="center">62.4</td>
212
- <td align="center">60.9</td>
213
- <td align="center">79.4</td>
214
- <td align="center">73.4</td>
215
- <td align="center">81.8</td>
216
- <td align="center"><b>77.5</b></td>
217
- </tr>
218
- <tr>
219
- <td rowspan="3"><b>General Knowledge</b></td>
220
- <td><b>MMLU-pro</b></td>
221
- <td align="center">-</td>
222
- <td align="center">-</td>
223
- <td align="center">-</td>
224
- <td align="center">40.7</td>
225
- <td align="center">70.5</td>
226
- <td align="center">47.6</td>
227
- <td align="center"><b>53.3</b></td>
228
- </tr>
229
- <tr>
230
- <td><b>MMLU</b></td>
231
- <td align="center">59.5</td>
232
- <td align="center">73.3</td>
233
- <td align="center">56.5</td>
234
- <td align="center">69.0</td>
235
- <td align="center">82.7</td>
236
- <td align="center">70.7</td>
237
- <td align="center"><b>73.7</b></td>
238
- </tr>
239
- <tr>
240
- <td><b>Avg.</b></td>
241
- <td align="center">59.5</td>
242
- <td align="center">73.3</td>
243
- <td align="center">56.5</td>
244
- <td align="center">54.8</td>
245
- <td align="center"><b>76.6</b></td>
246
- <td align="center">59.2</td>
247
- <td align="center">63.5</td>
248
- </tr>
249
- </tbody>
 
 
 
 
 
 
250
  </table>
251
 
252
- #### Korean
253
  <table>
254
- <thead>
255
- <tr>
256
- <th colspan="2"><b>Benchmark</b></th>
257
- <th>Exaone-3.5-2.4B-inst</th>
258
- <th>Qwen3-4B</th>
259
- <th>Mi:dm 2.0-Mini-inst</th>
260
- <th>Exaone-3.5-7.8B-inst</th>
261
- <th>Qwen3-14B</th>
262
- <th>Llama-3.1-8B-inst</th>
263
- <th>Mi:dm 2.0-Base-inst</th>
264
- </tr>
265
- </thead>
266
- <tbody>
267
- <!-- Comprehension -->
268
- <tr>
269
- <td rowspan="5"><b>Comprehension</b></td>
270
- <td><b>K-Prag*</b></td>
271
- <td align="center">68.7</td>
272
- <td align="center">73.9</td>
273
- <td align="center">69.5</td>
274
- <td align="center">73.5</td>
275
- <td align="center"><b>86.7</b></td>
276
- <td align="center">59.9</td>
277
- <td align="center">86.5</td>
278
- </tr>
279
- <tr>
280
- <td><b>K-Refer-Hard*</b></td>
281
- <td align="center">58.5</td>
282
- <td align="center">56.7</td>
283
- <td align="center">55.4</td>
284
- <td align="center">61.9</td>
285
- <td align="center"><b>74.0</b></td>
286
- <td align="center">48.6</td>
287
- <td align="center">70.8</td>
288
- </tr>
289
- <tr>
290
- <td><b>Ko-Best</b></td>
291
- <td align="center">87.2</td>
292
- <td align="center">91.5</td>
293
- <td align="center">80.5</td>
294
- <td align="center">92.0</td>
295
- <td align="center">93.9</td>
296
- <td align="center">77.4</td>
297
- <td align="center"><b>95.2</b></td>
298
- </tr>
299
- <tr>
300
- <td><b>Ko-Sovereign*</b></td>
301
- <td align="center">38.0</td>
302
- <td align="center">43.5</td>
303
- <td align="center">42.5</td>
304
- <td align="center">44.0</td>
305
- <td align="center">52.0</td>
306
- <td align="center">31.5</td>
307
- <td align="center"><b>53.0</b></td>
308
- </tr>
309
- <tr>
310
- <td><b>Avg.</b></td>
311
- <td align="center">62.5</td>
312
- <td align="center">66.6</td>
313
- <td align="center">61.9</td>
314
- <td align="center">67.2</td>
315
- <td align="center"><b>76.8</b></td>
316
- <td align="center">51.5</td>
317
- <td align="center">76.1</td>
318
- </tr>
319
- <tr>
320
- <td rowspan="5"><b>Reasoning</b></td>
321
- <td><b>Ko-Winogrande</b></td>
322
- <td align="center">60.3</td>
323
- <td align="center"><b>67.5</b></td>
324
- <td align="center">61.7</td>
325
- <td align="center">64.6</td>
326
- <td align="center">77.2</td>
327
- <td align="center">40.1</td>
328
- <td align="center">75.1</td>
329
- </tr>
330
- <tr>
331
- <td><b>Ko-Best</b></td>
332
- <td align="center">64.1</td>
333
- <td align="center"><b>69.2</b></td>
334
- <td align="center">64.5</td>
335
- <td align="center">60.3</td>
336
- <td align="center">75.4</td>
337
- <td align="center">26.0</td>
338
- <td align="center">73.0</td>
339
- </tr>
340
- <tr>
341
- <td><b>LogicKor*</b></td>
342
- <td align="center"><b>7.4</b></td>
343
- <td align="center">5.6</td>
344
- <td align="center">7.7</td>
345
- <td align="center">8.6</td>
346
- <td align="center">6.4</td>
347
- <td align="center">2.4</td>
348
- <td align="center">8.6</td>
349
- </tr>
350
- <tr>
351
- <td><b>HRM8K*</b></td>
352
- <td align="center">38.5</td>
353
- <td align="center"><b>56.7</b></td>
354
- <td align="center">39.9</td>
355
- <td align="center">49.7</td>
356
- <td align="center">64.5</td>
357
- <td align="center">30.9</td>
358
- <td align="center">52.9</td>
359
- </tr>
360
- <tr>
361
- <td><b>Avg.</b></td>
362
- <td align="center">36.7</td>
363
- <td align="center"><b>43.8</b></td>
364
- <td align="center">37.4</td>
365
- <td align="center">39.5</td>
366
- <td align="center">48.8</td>
367
- <td align="center">19.8</td>
368
- <td align="center">44.8</td>
369
- </tr>
370
- <!-- Society & Culture -->
371
- <tr>
372
- <td rowspan="5"><b>Society & Culture</b></td>
373
- <td><b>K-Refer*</b></td>
374
- <td align="center">64.0</td>
375
- <td align="center">53.6</td>
376
- <td align="center">66.4</td>
377
- <td align="center">71.6</td>
378
- <td align="center">72.4</td>
379
- <td align="center">43.2</td>
380
- <td align="center"><b>89.6</b></td>
381
- </tr>
382
- <tr>
383
- <td><b>K-Refer-Hard*</b></td>
384
- <td align="center">67.1</td>
385
- <td align="center">42.9</td>
386
- <td align="center">61.4</td>
387
- <td align="center">69.3</td>
388
- <td align="center">65.7</td>
389
- <td align="center">36.4</td>
390
- <td align="center"><b>86.4</b></td>
391
- </tr>
392
- <tr>
393
- <td><b>Ko-Sovereign*</b></td>
394
- <td align="center">44.4</td>
395
- <td align="center">35.8</td>
396
- <td align="center">36.7</td>
397
- <td align="center">46.9</td>
398
- <td align="center"><b>49.8</b></td>
399
- <td align="center">33.8</td>
400
- <td align="center">56.3</td>
401
- </tr>
402
- <tr>
403
- <td><b>HAERAE*</b></td>
404
- <td align="center">61.3</td>
405
- <td align="center">50.6</td>
406
- <td align="center">70.8</td>
407
- <td align="center">72.9</td>
408
- <td align="center">68.4</td>
409
- <td align="center">49.5</td>
410
- <td align="center"><b>81.5</b></td>
411
- </tr>
412
- <tr>
413
- <td><b>Avg.</b></td>
414
- <td align="center">59.2</td>
415
- <td align="center">45.7</td>
416
- <td align="center">58.8</td>
417
- <td align="center">65.2</td>
418
- <td align="center">64.1</td>
419
- <td align="center">40.7</td>
420
- <td align="center"><b>78.4</b></td>
421
- </tr>
422
- <!-- Reasoning (Domain) -->
423
- <tr>
424
- <td rowspan="3"><b>Reasoning (Domain)</b></td>
425
- <td><b>KMMLU</b></td>
426
- <td align="center">43.5</td>
427
- <td align="center">50.6</td>
428
- <td align="center">45.1</td>
429
- <td align="center">52.6</td>
430
- <td align="center">55.4</td>
431
- <td align="center">33.0</td>
432
- <td align="center"><b>57.3</b></td>
433
- </tr>
434
- <tr>
435
- <td><b>Ko-Sovereign*</b></td>
436
- <td align="center">42.4</td>
437
- <td align="center">42.5</td>
438
- <td align="center">42.4</td>
439
- <td align="center">45.6</td>
440
- <td align="center">54.7</td>
441
- <td align="center">36.7</td>
442
- <td align="center"><b>58.0</b></td>
443
- </tr>
444
- <tr>
445
- <td><b>Avg.</b></td>
446
- <td align="center">43.0</td>
447
- <td align="center">46.5</td>
448
- <td align="center">43.8</td>
449
- <td align="center">49.1</td>
450
- <td align="center">55.1</td>
451
- <td align="center">34.8</td>
452
- <td align="center"><b>57.7</b></td>
453
- </tr>
454
- <!-- Instruction Following -->
455
- <tr>
456
- <td rowspan="3"><b>Instruction Following</b></td>
457
- <td><b>Ko-IFEval*</b></td>
458
- <td align="center">65.4</td>
459
- <td align="center">75.9</td>
460
- <td align="center">73.3</td>
461
- <td align="center">69.1</td>
462
- <td align="center"><b>83.6</b></td>
463
- <td align="center">60.1</td>
464
- <td align="center">82.0</td>
465
- </tr>
466
- <tr>
467
- <td><b>Ko-MTBench</b></td>
468
- <td align="center">74.0</td>
469
- <td align="center">63.0</td>
470
- <td align="center">74.0</td>
471
- <td align="center">79.6</td>
472
- <td align="center">71.0</td>
473
- <td align="center">57.0</td>
474
- <td align="center"><b>89.7</b></td>
475
- </tr>
476
- <tr>
477
- <td><b>Avg.</b></td>
478
- <td align="center">68.9</td>
479
- <td align="center">69.4</td>
480
- <td align="center">73.6</td>
481
- <td align="center">74.4</td>
482
- <td align="center">77.3</td>
483
- <td align="center">58.5</td>
484
- <td align="center"><b>85.9</b></td>
485
- </tr>
486
- </tbody>
487
  </table>
488
 
489
  `*` indicates KT proprietary evaluation resources.
490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
  <br>
492
 
493
  # Usage
@@ -553,4 +571,5 @@ Mi:dm 2.0 is licensed under the [MIT License](./LICENSE).
553
  ### Contact
554
  Mi:dm 2.0 Technical Inquiries: [email protected]
555
 
556
- <br>
 
 
127
 
128
  # Evaluation
129
 
130
+
131
+ #### Korean
132
+
133
+ <!-- first half table-->
134
  <table>
135
+ <tr>
136
+ <th rowspan="2">Model</th>
137
+ <th colspan="5" align="center">Society & Culture</th>
138
+ <th colspan="3" align="center">General Knowledge</th>
139
+ <th colspan="3" align="center">Instruction Following</th>
140
+ </tr>
141
+ <tr>
142
+ <th align="center">K-Refer<sup>*</sup></th>
143
+ <th align="center">K-Refer-Hard<sup>*</sup></th>
144
+ <th align="center">Ko-Sovereign<sup>*</sup></th>
145
+ <th align="center">HAERAE</th>
146
+ <th align="center">Avg.</th>
147
+ <th align="center">KMMLU</th>
148
+ <th align="center">Ko-Sovereign<sup>*</sup></th>
149
+ <th align="center">Avg.</th>
150
+ <th align="center">Ko-IFEval</th>
151
+ <th align="center">Ko-MTBench</th>
152
+ <th align="center">Avg.</th>
153
+ </tr>
154
+
155
+ <!-- Small Models -->
156
+ <tr>
157
+ <td><strong>Qwen3-4B</strong></td>
158
+ <td align="center">53.6</td>
159
+ <td align="center">42.9</td>
160
+ <td align="center">35.8</td>
161
+ <td align="center">50.6</td>
162
+ <td align="center">45.7</td>
163
+ <td align="center"><strong>50.6</strong></td>
164
+ <td align="center"><strong>42.5</strong></td>
165
+ <td align="center"><strong>46.5</strong></td>
166
+ <td align="center"><strong>75.9</strong></td>
167
+ <td align="center">63.0</td>
168
+ <td align="center">69.4</td>
169
+ </tr>
170
+ <tr>
171
+ <td><strong>Exaone-3.5-2.4B-inst</strong></td>
172
+ <td align="center">64.0</td>
173
+ <td align="center"><strong>67.1</strong></td>
174
+ <td align="center"><strong>44.4</strong></td>
175
+ <td align="center">61.3</td>
176
+ <td align="center"><strong>59.2</strong></td>
177
+ <td align="center">43.5</td>
178
+ <td align="center">42.4</td>
179
+ <td align="center">43.0</td>
180
+ <td align="center">65.4</td>
181
+ <td align="center"><u>74.0</u></td>
182
+ <td align="center">68.9</td>
183
+ </tr>
184
+ <tr>
185
+ <td><strong>Mi:dm 2.0-Mini-inst</strong></td>
186
+ <td align="center"><u>66.4</u></td>
187
+ <td align="center">61.4</td>
188
+ <td align="center">36.7</td>
189
+ <td align="center"><u>70.8</u></td>
190
+ <td align="center">58.8</td>
191
+ <td align="center">45.1</td>
192
+ <td align="center">42.4</td>
193
+ <td align="center">43.8</td>
194
+ <td align="center"><u>73.3</u></td>
195
+ <td align="center"><strong>74.0</strong></td>
196
+ <td align="center"><strong>73.6</strong></td>
197
+ </tr>
198
+
199
+ <!-- Spacer row -->
200
+ <tr><td colspan="13"> </td></tr>
201
+
202
+ <!-- Large Models -->
203
+ <tr>
204
+ <td><strong>Qwen3-14B</strong></td>
205
+ <td align="center"><u>72.4</u></td>
206
+ <td align="center">65.7</td>
207
+ <td align="center"><u>49.8</u></td>
208
+ <td align="center">68.4</td>
209
+ <td align="center">64.1</td>
210
+ <td align="center"><u>55.4</u></td>
211
+ <td align="center"><u>54.7</u></td>
212
+ <td align="center"><u>55.1</u></td>
213
+ <td align="center"><strong>83.6</strong></td>
214
+ <td align="center">71</td>
215
+ <td align="center"><u>77.3</u></td>
216
+ </tr>
217
+ <tr>
218
+ <td><strong>Llama-3.1-8B-inst</strong></td>
219
+ <td align="center">43.2</td>
220
+ <td align="center">36.4</td>
221
+ <td align="center">33.8</td>
222
+ <td align="center">49.5</td>
223
+ <td align="center">40.7</td>
224
+ <td align="center">33.0</td>
225
+ <td align="center">36.7</td>
226
+ <td align="center">34.8</td>
227
+ <td align="center">60.1</td>
228
+ <td align="center">57</td>
229
+ <td align="center">58.5</td>
230
+ </tr>
231
+ <tr>
232
+ <td><strong>Exaone-3.5-7.8B-inst</strong></td>
233
+ <td align="center">71.6</td>
234
+ <td align="center"><u>69.3</u></td>
235
+ <td align="center">46.9</td>
236
+ <td align="center"><u>72.9</u></td>
237
+ <td align="center"><u>65.2</u></td>
238
+ <td align="center">52.6</td>
239
+ <td align="center">45.6</td>
240
+ <td align="center">49.1</td>
241
+ <td align="center">69.1</td>
242
+ <td align="center"><u>79.6</u></td>
243
+ <td align="center">74.4</td>
244
+ </tr>
245
+ <tr>
246
+ <td><strong>Mi:dm 2.0-Base-inst</strong></td>
247
+ <td align="center"><strong>89.6</strong></td>
248
+ <td align="center"><strong>86.4</strong></td>
249
+ <td align="center"><strong>56.3</strong></td>
250
+ <td align="center"><strong>81.5</strong></td>
251
+ <td align="center"><strong>78.4</strong></td>
252
+ <td align="center"><strong>57.3</strong></td>
253
+ <td align="center"><strong>58.0</strong></td>
254
+ <td align="center"><strong>57.7</strong></td>
255
+ <td align="center"><u>82</u></td>
256
+ <td align="center"><strong>89.7</strong></td>
257
+ <td align="center"><strong>85.9</strong></td>
258
+ </tr>
259
  </table>
260
 
261
+ <!-- second half table-->
262
  <table>
263
+ <tr>
264
+ <th rowspan="2" align="center">Model</th>
265
+ <th colspan="5" align="center">Comprehension</th>
266
+ <th colspan="5" align="center">Reasoning</th>
267
+ </tr>
268
+ <tr>
269
+ <th align="center">K-Prag<sup>*</sup></th>
270
+ <th align="center">K-Refer-Hard<sup>*</sup></th>
271
+ <th align="center">Ko-Best</th>
272
+ <th align="center">Ko-Sovereign<sup>*</sup></th>
273
+ <th align="center">Avg.</th>
274
+ <th align="center">Ko-Winogrande</th>
275
+ <th align="center">Ko-Best</th>
276
+ <th align="center">LogicKor</th>
277
+ <th align="center">HRM8K</th>
278
+ <th align="center">Avg.</th>
279
+ </tr>
280
+
281
+ <!-- Small Models -->
282
+ <tr>
283
+ <td><strong>Qwen3-4B</strong></td>
284
+ <td align="center"><strong>73.9<strong></td>
285
+ <td align="center">56.7</td>
286
+ <td align="center"><strong>91.5</strong></td>
287
+ <td align="center"><strong>43.5</strong></td>
288
+ <td align="center"><strong>66.6</strong></td>
289
+ <td align="center"><strong>67.5</strong></td>
290
+ <td align="center"><strong>69.2</strong></td>
291
+ <td align="center">5.6</td>
292
+ <td align="center"><strong>56.7</strong></td>
293
+ <td align="center"><strong>43.8</strong></td>
294
+ </tr>
295
+ <tr>
296
+ <td><strong>Exaone-3.5-2.4B-inst</strong></td>
297
+ <td align="center">68.7</td>
298
+ <td align="center"><strong>58.5</strong></td>
299
+ <td align="center"><u>87.2</u></td>
300
+ <td align="center">38.0</td>
301
+ <td align="center"><u>62.5</u></td>
302
+ <td align="center">60.3</td>
303
+ <td align="center">64.1</td>
304
+ <td align="center">7.4</td>
305
+ <td align="center">38.5</td>
306
+ <td align="center">36.7</td>
307
+ </tr>
308
+ <tr>
309
+ <td><strong>Mi:dm 2.0-Mini-inst</strong></td>
310
+ <td align="center">69.5</td>
311
+ <td align="center">55.4</td>
312
+ <td align="center">80.5</td>
313
+ <td align="center">42.5</td>
314
+ <td align="center">61.9</td>
315
+ <td align="center"><u>61.7</u></td>
316
+ <td align="center"><u>64.5</u></td>
317
+ <td align="center"><strong>7.7</strong></td>
318
+ <td align="center"><u>39.9</u></td>
319
+ <td align="center"><u>37.4</u></td>
320
+ </tr>
321
+
322
+ <!-- Visual Spacer -->
323
+ <tr><td colspan="11"> </td></tr>
324
+
325
+ <!-- Large Models -->
326
+ <tr>
327
+ <td><strong>Qwen3-14B</strong></td>
328
+ <td align="center"><strong>86.7</strong></td>
329
+ <td align="center"><strong>74.0</strong></td>
330
+ <td align="center">93.9</td>
331
+ <td align="center">52.0</td>
332
+ <td align="center"><strong>76.8</strong></td>
333
+ <td align="center"><strong>77.2</strong></td>
334
+ <td align="center"><strong>75.4</strong></td>
335
+ <td align="center">6.4</td>
336
+ <td align="center"><strong>64.5</strong></td>
337
+ <td align="center"><strong>48.8</strong></td>
338
+ </tr>
339
+ <tr>
340
+ <td><strong>Llama-3.1-8B-inst</strong></td>
341
+ <td align="center">59.9</td>
342
+ <td align="center">48.6</td>
343
+ <td align="center">77.4</td>
344
+ <td align="center">31.5</td>
345
+ <td align="center">51.5</td>
346
+ <td align="center">40.1</td>
347
+ <td align="center">26.0</td>
348
+ <td align="center">2.4</td>
349
+ <td align="center">30.9</td>
350
+ <td align="center">19.8</td>
351
+ </tr>
352
+ <tr>
353
+ <td><strong>Exaone-3.5-7.8B-inst</strong></td>
354
+ <td align="center"><u>73.5</u></td>
355
+ <td align="center"><u>61.9</u></td>
356
+ <td align="center"><u>92.0</u></td>
357
+ <td align="center">44.0</td>
358
+ <td align="center">67.2</td>
359
+ <td align="center">64.6</td>
360
+ <td align="center">60.3</td>
361
+ <td align="center"><strong>8.6</strong></td>
362
+ <td align="center">49.7</td>
363
+ <td align="center">39.5</td>
364
+ </tr>
365
+ <tr>
366
+ <td><strong>Mi:dm 2.0-Base-inst</strong></td>
367
+ <td align="center"><u>86.5</u></td>
368
+ <td align="center"><u>70.8</u></td>
369
+ <td align="center"><strong>95.2</strong></td>
370
+ <td align="center"><strong>53.0</strong></td>
371
+ <td align="center"><u>76.1</u></td>
372
+ <td align="center"><u>75.1</u></td>
373
+ <td align="center"><u>73.0</u></td>
374
+ <td align="center"><strong>8.6</strong></td>
375
+ <td align="center"><u>52.9</u></td>
376
+ <td align="center"><u>44.8</u></td>
377
+ </tr>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  </table>
379
 
380
  `*` indicates KT proprietary evaluation resources.
381
 
382
+ <br>
383
+
384
+
385
+ #### English
386
+
387
+
388
+ <table>
389
+ <tr>
390
+ <th rowspan="2" align="center">Model</th>
391
+ <th align="center">Instruction</th>
392
+ <th colspan="4" align="center">Reasoning</th>
393
+ <th align="center">Math</th>
394
+ <th align="center">Coding</th>
395
+ <th colspan="3" align="center">General Knowledge</th>
396
+ </tr>
397
+ <tr>
398
+ <th align="center">IFEval</th>
399
+ <th align="center">BBH</th>
400
+ <th align="center">GPQA</th>
401
+ <th align="center">MuSR</th>
402
+ <th align="center">Avg.</th>
403
+ <th align="center">GSM8K</th>
404
+ <th align="center">MBPP+</th>
405
+ <th align="center">MMLU-pro</th>
406
+ <th align="center">MMLU</th>
407
+ <th align="center">Avg.</th>
408
+ </tr>
409
+
410
+ <!-- Small Models -->
411
+ <tr>
412
+ <td><strong>Qwen3-4B</strong></td>
413
+ <td align="center"><u>79.7</u></td>
414
+ <td align="center"><strong>79.0</strong></td>
415
+ <td align="center"><u>39.8</u></td>
416
+ <td align="center"><strong>58.5</strong></td>
417
+ <td align="center"><strong>59.1</strong></td>
418
+ <td align="center"><strong>90.4</strong></td>
419
+ <td align="center"><u>62.4</u></td>
420
+ <td align="center">-</td>
421
+ <td align="center"><strong>73.3</strong></td>
422
+ <td align="center"><strong>73.3</strong></td>
423
+ </tr>
424
+ <tr>
425
+ <td><strong>Exaone-3.5-2.4B-inst</strong></td>
426
+ <td align="center"><strong>81.1</strong></td>
427
+ <td align="center">46.4</td>
428
+ <td align="center">28.1</td>
429
+ <td align="center">49.7</td>
430
+ <td align="center">41.4</td>
431
+ <td align="center"><u>82.5</u></td>
432
+ <td align="center">59.8</td>
433
+ <td align="center">-</td>
434
+ <td align="center">59.5</td>
435
+ <td align="center">59.5</td>
436
+ </tr>
437
+ <tr>
438
+ <td><strong>Mi:dm 2.0-Mini-inst</strong></td>
439
+ <td align="center">73.6</td>
440
+ <td align="center"><u>44.5</u></td>
441
+ <td align="center">26.6</td>
442
+ <td align="center"><u>51.7</u></td>
443
+ <td align="center"><u>40.9</u></td>
444
+ <td align="center">83.1</td>
445
+ <td align="center"><strong>60.9</strong></td>
446
+ <td align="center">-</td>
447
+ <td align="center">56.5</td>
448
+ <td align="center">56.5</td>
449
+ </tr>
450
+
451
+ <tr><td colspan="11">&nbsp;</td></tr>
452
+
453
+ <!-- Large Models -->
454
+ <tr>
455
+ <td><strong>Qwen3-14B</strong></td>
456
+ <td align="center"><u>83.9</u></td>
457
+ <td align="center"><strong>83.4</strong></td>
458
+ <td align="center"><strong>49.8</strong></td>
459
+ <td align="center"><strong>57.7</strong></td>
460
+ <td align="center"><strong>63.6</strong></td>
461
+ <td align="center">88.0</td>
462
+ <td align="center">73.4</td>
463
+ <td align="center"><strong>70.5</strong></td>
464
+ <td align="center"><strong>82.7</strong></td>
465
+ <td align="center"><strong>76.6</strong></td>
466
+ </tr>
467
+ <tr>
468
+ <td><strong>Llama-3.1-8B-inst</strong></td>
469
+ <td align="center">79.9</td>
470
+ <td align="center"><u>60.3</u></td>
471
+ <td align="center">21.6</td>
472
+ <td align="center">50.3</td>
473
+ <td align="center">44.1</td>
474
+ <td align="center">81.2</td>
475
+ <td align="center"><strong>81.8</strong></td>
476
+ <td align="center">47.6</td>
477
+ <td align="center"><u>70.7</u></td>
478
+ <td align="center"><u>59.2</u></td>
479
+ </tr>
480
+ <tr>
481
+ <td><strong>Exaone-3.5-7.8B-inst</strong></td>
482
+ <td align="center">83.6</td>
483
+ <td align="center">50.1</td>
484
+ <td align="center"><u>33.1</u></td>
485
+ <td align="center"><u>51.2</u></td>
486
+ <td align="center"><u>44.8</u></td>
487
+ <td align="center">81.1</td>
488
+ <td align="center">79.4</td>
489
+ <td align="center">40.7</td>
490
+ <td align="center">69.0</td>
491
+ <td align="center">54.8</td>
492
+ </tr>
493
+ <tr>
494
+ <td><strong>Mi:dm 2.0-Base-inst</strong></td>
495
+ <td align="center"><strong>84.0</strong></td>
496
+ <td align="center">77.7</td>
497
+ <td align="center">33.5</td>
498
+ <td align="center">51.9</td>
499
+ <td align="center">54.4</td>
500
+ <td align="center"><strong>91.6</strong></td>
501
+ <td align="center"><u>77.5</u></td>
502
+ <td align="center"><u>53.3</u></td>
503
+ <td align="center">73.7</td>
504
+ <td align="center">63.5</td>
505
+ </tr>
506
+ </table>
507
+
508
+
509
  <br>
510
 
511
  # Usage
 
571
  ### Contact
572
  Mi:dm 2.0 Technical Inquiries: [email protected]
573
 
574
+ <br>
575
+