Update README.md
Browse files
README.md
CHANGED
@@ -166,27 +166,27 @@ lm_eval --model hf --model_args pretrained=microsoft/Phi-4-mini-instruct --tasks
|
|
166 |
lm_eval --model hf --model_args pretrained=pytorch/Phi-4-mini-instruct-8da4w --tasks hellaswag --device cuda:0 --batch_size 8
|
167 |
```
|
168 |
|
169 |
-
| Benchmark |
|
170 |
-
|
171 |
-
| | Phi-4
|
172 |
-
| **Popular aggregated benchmark** |
|
173 |
-
| mmlu (0 shot) | 66.73
|
174 |
-
| mmlu_pro (5-shot) | 46.43
|
175 |
-
| **Reasoning** |
|
176 |
-
| arc_challenge | 56.91
|
177 |
-
| gpqa_main_zeroshot | 30.13
|
178 |
-
| hellaswag | 54.57
|
179 |
-
| openbookqa | 33.00
|
180 |
-
| piqa (0-shot) | 77.64
|
181 |
-
| siqa | 49.59
|
182 |
-
| truthfulqa_mc2 (0-shot) | 48.39
|
183 |
-
| winogrande (0-shot) | 71.11
|
184 |
-
| **Multilingual** |
|
185 |
-
| mgsm_en_cot_en | 60.80
|
186 |
-
| **Math** |
|
187 |
-
| gsm8k (5-shot) | 81.88
|
188 |
-
| Mathqa (0-shot) | 42.31
|
189 |
-
| **Overall** | 55.35
|
190 |
|
191 |
|
192 |
# Exporting to ExecuTorch
|
|
|
166 |
lm_eval --model hf --model_args pretrained=pytorch/Phi-4-mini-instruct-8da4w --tasks hellaswag --device cuda:0 --batch_size 8
|
167 |
```
|
168 |
|
169 |
+
| Benchmark | | |
|
170 |
+
|----------------------------------|----------------|---------------------------|
|
171 |
+
| | Phi-4-mini-ins | Phi-4-mini-instruct-8da4w |
|
172 |
+
| **Popular aggregated benchmark** | | |
|
173 |
+
| mmlu (0 shot) | 66.73 | 60.75 |
|
174 |
+
| mmlu_pro (5-shot) | 46.43 | 11.75 |
|
175 |
+
| **Reasoning** | | |
|
176 |
+
| arc_challenge | 56.91 | 48.46 |
|
177 |
+
| gpqa_main_zeroshot | 30.13 | 30.80 |
|
178 |
+
| hellaswag | 54.57 | 50.35 |
|
179 |
+
| openbookqa | 33.00 | 30.40 |
|
180 |
+
| piqa (0-shot) | 77.64 | 74.43 |
|
181 |
+
| siqa | 49.59 | 44.98 |
|
182 |
+
| truthfulqa_mc2 (0-shot) | 48.39 | 51.35 |
|
183 |
+
| winogrande (0-shot) | 71.11 | 70.32 |
|
184 |
+
| **Multilingual** | | |
|
185 |
+
| mgsm_en_cot_en | 60.80 | 57.60 |
|
186 |
+
| **Math** | | |
|
187 |
+
| gsm8k (5-shot) | 81.88 | 61.71 |
|
188 |
+
| Mathqa (0-shot) | 42.31 | 36.95 |
|
189 |
+
| **Overall** | 55.35 | 48.45 |
|
190 |
|
191 |
|
192 |
# Exporting to ExecuTorch
|