Spaces:

SeaLLMs
/

LLM_Leaderboard_for_SEA

Running

App Files Files Community

isakzhang commited on Nov 26, 2024

Commit

14f2c25

verified ·

1 Parent(s): a9804eb

Update src/display/about.py

Browse files

Files changed (1) hide show

src/display/about.py +7 -11

src/display/about.py CHANGED Viewed

@@ -17,7 +17,7 @@ class Tasks(Enum):
 # Your leaderboard name
 # TITLE = """<h1 align="center" id="space-title">📃 SeaExam and SeaBench Leaderboard</h1>"""
-TITLE = """<h1 align="left" id="space-title">🏅 LLM Leaderboard for SEA</h1>"""
 # subtitle
 SUB_TITLE = """<h2 align="left" id="space-title">What is the best LLM for Southeast Asian Languages❓</h1>"""
@@ -36,11 +36,12 @@ This leaderboard evaluates Large Language Models (LLMs) on Southeast Asian (SEA)
 """
 INTRODUCTION_TEXT = """
-This leaderboard evaluates Large Language Models (LLMs) on Southeast Asian (SEA) languages through two comprehensive benchmarks: SeaExam and SeaBench:
-* SeaExam assesses world knowledge and reasoning capabilities through exam-style questions [[data (public)](https://huggingface.co/datasets/SeaLLMs/SeaExam)] [[code](https://github.com/DAMO-NLP-SG/SeaExam)]
-* SeaBench evaluates instruction-following abilities and multi-turn conversational skills. [[data (public)](https://huggingface.co/datasets/SeaLLMs/SeaBench)] [[code](https://github.com/DAMO-NLP-SG/SeaBench?tab=readme-ov-file)]
-Note: "pub" denotes public dataset, and "prv" denotes private dataset.
 For more details, please refer to the "📝 About" tab.
 """
 # For additional details such as datasets, evaluation criteria, and reproducibility, please refer to the "📝 About" tab.
@@ -119,16 +120,11 @@ If everything is done, check you can launch the EleutherAIHarness on your model
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""
-@article{damonlp2024sealeaderboard,
-  author = {Chaoqun Liu, Wenxuan Zhang, Jiahao Ying, Mahani Aljunied, Anh Tuan Luu, Lidong Bing},
-  title = {SeaExam and SeaBench: Benchmarking LLMs with Local Multilingual Questions in Southeast Asia},
-  year = {2024},
-  url = {},
 }
 """
 CONTACT_TEXT = f"""
 ## Contact
-If you have any questions or want to include your models in the leaderboard, please contact Chaoqun Liu (<[email protected]>) and Wenxuan Zhang (<saike.zwx@alibaba-inc.com>).
 """

 # Your leaderboard name
 # TITLE = """<h1 align="center" id="space-title">📃 SeaExam and SeaBench Leaderboard</h1>"""
+TITLE = """<h1 align="left" id="space-title">🏅 LLM Leaderboard for Southeast Asian Languages</h1>"""
 # subtitle
 SUB_TITLE = """<h2 align="left" id="space-title">What is the best LLM for Southeast Asian Languages❓</h1>"""
 """
 INTRODUCTION_TEXT = """
+This leaderboard evaluates Large Language Models (LLMs) on Southeast Asian (SEA) languages through two comprehensive benchmarks - SeaExam and SeaBench:
+* **SeaExam** assesses world knowledge and reasoning capabilities through exam-style questions [[data (public)](https://huggingface.co/datasets/SeaLLMs/SeaExam)] [[eval code](https://github.com/DAMO-NLP-SG/SeaExam)]
+* **SeaBench** evaluates instruction-following abilities and multi-turn conversational skills. [[data (public)](https://huggingface.co/datasets/SeaLLMs/SeaBench)] [[eval code](https://github.com/DAMO-NLP-SG/SeaBench?tab=readme-ov-file)]
+Below are the aggregated results for SeaExam and SeaBench, shown both the public dataset ("pub") - which you can download via the link above - and our in-house held-out private dataset ("prv").
+For detailed results by language, please refer to the individual sub-tabs.
 For more details, please refer to the "📝 About" tab.
 """
 # For additional details such as datasets, evaluation criteria, and reproducibility, please refer to the "📝 About" tab.
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""
 }
 """
 CONTACT_TEXT = f"""
 ## Contact
+If you have any questions or want to include your models in the leaderboard, please contact Chaoqun Liu (<[email protected]>) and [Wenxuan Zhang](https://isakzhang.github.io/).
 """