Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

Clémentine commited on Dec 10, 2024

Commit

6e9fc26

1 Parent(s): 2749dc3

updated tooltips with correct citations

Files changed (2) hide show

frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/tooltips.js CHANGED Viewed

@@ -103,7 +103,7 @@ export const COLUMN_TOOLTIPS = {
     },
   ]),
-  MUSR: createTooltipContent("Multistep Soft Reasoning (MUSR):", [
     {
       label: "Scope",
       description: "Reasoning and understanding on/of long texts",

     },
   ]),
+  MUSR: createTooltipContent("Multistep Soft Reasoning (MuSR):", [
     {
       label: "Scope",
       description: "Reasoning and understanding on/of long texts",

frontend/src/pages/QuotePage/QuotePage.js CHANGED Viewed

@@ -74,7 +74,7 @@ const benchmarks = [
     url: "https://arxiv.org/abs/2311.07911",
   },
   {
-    title: "CBT: Challenging BIG-Bench Tasks",
     authors: "Suzgun et al.",
     citation: `@misc{suzgun2022challengingbigbenchtaskschainofthought,
   title={Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them},
@@ -88,7 +88,7 @@ const benchmarks = [
     url: "https://arxiv.org/abs/2210.09261",
   },
   {
-    title: "MATH: Mathematical Problem Solving Dataset",
     authors: "Hendrycks et al.",
     citation: `@misc{hendrycks2021measuringmathematicalproblemsolving,
   title={Measuring Mathematical Problem Solving With the MATH Dataset},
@@ -130,7 +130,7 @@ const benchmarks = [
     url: "https://arxiv.org/abs/2310.16049",
   },
   {
-    title: "MMLU-Pro: Multi-Task Language Understanding Pro",
     authors: "Wang et al.",
     citation: `@misc{wang2024mmluprorobustchallengingmultitask,
   title={MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark},

     url: "https://arxiv.org/abs/2311.07911",
   },
   {
+    title: "BBH: Big-Bench Hard",
     authors: "Suzgun et al.",
     citation: `@misc{suzgun2022challengingbigbenchtaskschainofthought,
   title={Challenging BIG-Bench Tasks and Whether Chain-of-Thought Can Solve Them},
     url: "https://arxiv.org/abs/2210.09261",
   },
   {
+    title: "MATH: Mathematics Aptitude Test of Heuristics - Level 5",
     authors: "Hendrycks et al.",
     citation: `@misc{hendrycks2021measuringmathematicalproblemsolving,
   title={Measuring Mathematical Problem Solving With the MATH Dataset},
     url: "https://arxiv.org/abs/2310.16049",
   },
   {
+    title: "MMLU-Pro: Massive Multitask Language Understanding Professional",
     authors: "Wang et al.",
     citation: `@misc{wang2024mmluprorobustchallengingmultitask,
   title={MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark},