sequelbox commited on
Commit
1c1683f
1 Parent(s): 48e48f6
Files changed (1) hide show
  1. README.md +92 -0
README.md CHANGED
@@ -159,6 +159,98 @@ model-index:
159
  - type: acc
160
  value: 66.00
161
  name: acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  license: llama3.1
163
  ---
164
 
 
159
  - type: acc
160
  value: 66.00
161
  name: acc
162
+ - task:
163
+ type: text-generation
164
+ name: Text Generation
165
+ dataset:
166
+ name: IFEval (0-Shot)
167
+ type: HuggingFaceH4/ifeval
168
+ args:
169
+ num_few_shot: 0
170
+ metrics:
171
+ - type: inst_level_strict_acc and prompt_level_strict_acc
172
+ value: 53.55
173
+ name: strict accuracy
174
+ source:
175
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-70B-ShiningValiant2
176
+ name: Open LLM Leaderboard
177
+ - task:
178
+ type: text-generation
179
+ name: Text Generation
180
+ dataset:
181
+ name: BBH (3-Shot)
182
+ type: BBH
183
+ args:
184
+ num_few_shot: 3
185
+ metrics:
186
+ - type: acc_norm
187
+ value: 52.39
188
+ name: normalized accuracy
189
+ source:
190
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-70B-ShiningValiant2
191
+ name: Open LLM Leaderboard
192
+ - task:
193
+ type: text-generation
194
+ name: Text Generation
195
+ dataset:
196
+ name: MATH Lvl 5 (4-Shot)
197
+ type: hendrycks/competition_math
198
+ args:
199
+ num_few_shot: 4
200
+ metrics:
201
+ - type: exact_match
202
+ value: 27.19
203
+ name: exact match
204
+ source:
205
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-70B-ShiningValiant2
206
+ name: Open LLM Leaderboard
207
+ - task:
208
+ type: text-generation
209
+ name: Text Generation
210
+ dataset:
211
+ name: GPQA (0-shot)
212
+ type: Idavidrein/gpqa
213
+ args:
214
+ num_few_shot: 0
215
+ metrics:
216
+ - type: acc_norm
217
+ value: 19.02
218
+ name: acc_norm
219
+ source:
220
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-70B-ShiningValiant2
221
+ name: Open LLM Leaderboard
222
+ - task:
223
+ type: text-generation
224
+ name: Text Generation
225
+ dataset:
226
+ name: MuSR (0-shot)
227
+ type: TAUR-Lab/MuSR
228
+ args:
229
+ num_few_shot: 0
230
+ metrics:
231
+ - type: acc_norm
232
+ value: 18.48
233
+ name: acc_norm
234
+ source:
235
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-70B-ShiningValiant2
236
+ name: Open LLM Leaderboard
237
+ - task:
238
+ type: text-generation
239
+ name: Text Generation
240
+ dataset:
241
+ name: MMLU-PRO (5-shot)
242
+ type: TIGER-Lab/MMLU-Pro
243
+ config: main
244
+ split: test
245
+ args:
246
+ num_few_shot: 5
247
+ metrics:
248
+ - type: acc
249
+ value: 46.37
250
+ name: accuracy
251
+ source:
252
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-70B-ShiningValiant2
253
+ name: Open LLM Leaderboard
254
  license: llama3.1
255
  ---
256