Update README.md
Browse files
README.md
CHANGED
@@ -180,20 +180,15 @@ Okay, the user is asking if I can talk to them. First, I need to clarify that I
|
|
180 |
| Benchmark | | |
|
181 |
|----------------------------------|----------------|---------------------------|
|
182 |
| | SmolLM3-3B | SmolLM3-3B-8da4w |
|
183 |
-
| **Popular aggregated benchmark** | | |
|
184 |
-
| mmlu | - | - |
|
185 |
-
| mmlu_pro | - | - |
|
186 |
-
| bbh | - | - |
|
187 |
| **Reasoning** | | |
|
188 |
| hellaswag | 56.53 | 54.39 |
|
189 |
| gpqa_main_zeroshot | 32.37 | 27.46 |
|
190 |
| **Multilingual** | | |
|
191 |
-
| m_mmlu | - | - |
|
192 |
| mgsm_en_cot_en | 66.80 | 40.40 |
|
193 |
| **Math** | | |
|
194 |
| gsm8k | 72.71 | 58.08 |
|
195 |
| leaderboard_math_hard (v3) | 27.87 | 19.94 |
|
196 |
-
| **Overall** |
|
197 |
|
198 |
<details>
|
199 |
<summary> Reproduce Model Quality Results </summary>
|
|
|
180 |
| Benchmark | | |
|
181 |
|----------------------------------|----------------|---------------------------|
|
182 |
| | SmolLM3-3B | SmolLM3-3B-8da4w |
|
|
|
|
|
|
|
|
|
183 |
| **Reasoning** | | |
|
184 |
| hellaswag | 56.53 | 54.39 |
|
185 |
| gpqa_main_zeroshot | 32.37 | 27.46 |
|
186 |
| **Multilingual** | | |
|
|
|
187 |
| mgsm_en_cot_en | 66.80 | 40.40 |
|
188 |
| **Math** | | |
|
189 |
| gsm8k | 72.71 | 58.08 |
|
190 |
| leaderboard_math_hard (v3) | 27.87 | 19.94 |
|
191 |
+
| **Overall** | 51.25 | 40.05 |
|
192 |
|
193 |
<details>
|
194 |
<summary> Reproduce Model Quality Results </summary>
|