DhiyaEddine
commited on
Commit
•
57f5a57
1
Parent(s):
6bb1918
Update README.md
Browse files
README.md
CHANGED
@@ -114,7 +114,7 @@ We report in the following table our internal pipeline benchmarks:
|
|
114 |
</tr>
|
115 |
<tr>
|
116 |
<td>MMLU-PRO (5-shot)</td>
|
117 |
-
<td
|
118 |
<td>-</td>
|
119 |
<td>-</td>
|
120 |
<td>29.6%</td>
|
@@ -122,7 +122,7 @@ We report in the following table our internal pipeline benchmarks:
|
|
122 |
</tr>
|
123 |
<tr>
|
124 |
<td>IFEval</td>
|
125 |
-
<td
|
126 |
<td>-</td>
|
127 |
<td>-</td>
|
128 |
<td>78.6%</td>
|
@@ -156,7 +156,7 @@ We report in the following table our internal pipeline benchmarks:
|
|
156 |
</tr>
|
157 |
<tr>
|
158 |
<td>GPQA (0-shot)</td>
|
159 |
-
<td
|
160 |
<td>-</td>
|
161 |
<td>-</td>
|
162 |
<td>2.4%</td>
|
@@ -164,7 +164,7 @@ We report in the following table our internal pipeline benchmarks:
|
|
164 |
</tr>
|
165 |
<tr>
|
166 |
<td>MUSR (0-shot)</td>
|
167 |
-
<td
|
168 |
<td>-</td>
|
169 |
<td>-</td>
|
170 |
<td>8.4%</td>
|
@@ -172,7 +172,7 @@ We report in the following table our internal pipeline benchmarks:
|
|
172 |
</tr>
|
173 |
<tr>
|
174 |
<td>BBH (3-shot)</td>
|
175 |
-
<td
|
176 |
<td>-</td>
|
177 |
<td>-</td>
|
178 |
<td>29.9%</td>
|
|
|
114 |
</tr>
|
115 |
<tr>
|
116 |
<td>MMLU-PRO (5-shot)</td>
|
117 |
+
<td>32.4%</td>
|
118 |
<td>-</td>
|
119 |
<td>-</td>
|
120 |
<td>29.6%</td>
|
|
|
122 |
</tr>
|
123 |
<tr>
|
124 |
<td>IFEval</td>
|
125 |
+
<td>69.9%</td>
|
126 |
<td>-</td>
|
127 |
<td>-</td>
|
128 |
<td>78.6%</td>
|
|
|
156 |
</tr>
|
157 |
<tr>
|
158 |
<td>GPQA (0-shot)</td>
|
159 |
+
<td>10.3%</td>
|
160 |
<td>-</td>
|
161 |
<td>-</td>
|
162 |
<td>2.4%</td>
|
|
|
164 |
</tr>
|
165 |
<tr>
|
166 |
<td>MUSR (0-shot)</td>
|
167 |
+
<td>8.2%</td>
|
168 |
<td>-</td>
|
169 |
<td>-</td>
|
170 |
<td>8.4%</td>
|
|
|
172 |
</tr>
|
173 |
<tr>
|
174 |
<td>BBH (3-shot)</td>
|
175 |
+
<td>33.3%</td>
|
176 |
<td>-</td>
|
177 |
<td>-</td>
|
178 |
<td>29.9%</td>
|