Update README.md
Browse files
README.md
CHANGED
@@ -129,19 +129,15 @@ We rely on [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-h
|
|
129 |
|
130 |
| Benchmark | | |
|
131 |
|----------------------------------|----------------|---------------------------|
|
132 |
-
| | Qwen3-32B
|
133 |
| **General** | | |
|
134 |
-
| mmlu |
|
135 |
-
|
|
136 |
-
| bbh | WIP | WIP |
|
137 |
| **Multilingual** | | |
|
138 |
-
| mgsm_en_cot_en |
|
139 |
-
| m_mmlu (avg) | WIP | WIP |
|
140 |
| **Math** | | |
|
141 |
-
| gpqa_main_zeroshot |
|
142 |
-
|
|
143 |
-
| leaderboard_math_hard (v3) | WIP | WIP |
|
144 |
-
| **Overall** | WIP | WIP |
|
145 |
|
146 |
<details>
|
147 |
<summary> Reproduce Model Quality Results </summary>
|
|
|
129 |
|
130 |
| Benchmark | | |
|
131 |
|----------------------------------|----------------|---------------------------|
|
132 |
+
| | Qwen3-32B | Qwen3-32B-float8dq |
|
133 |
| **General** | | |
|
134 |
+
| mmlu | 80.71 | 80.67 |
|
135 |
+
| bbh | 37.49 | 38.01 |
|
|
|
136 |
| **Multilingual** | | |
|
137 |
+
| mgsm_en_cot_en | 64.40 | WIP |
|
|
|
138 |
| **Math** | | |
|
139 |
+
| gpqa_main_zeroshot | 41.96 | 42.63 |
|
140 |
+
| **Overall** | 56.14 | WIP |
|
|
|
|
|
141 |
|
142 |
<details>
|
143 |
<summary> Reproduce Model Quality Results </summary>
|