Spaces:
Running
Running
from dataclasses import dataclass | |
from enum import Enum | |
class Task: | |
benchmark: str | |
metric: str | |
col_name: str | |
# Select your tasks here | |
# --------------------------------------------------- | |
class Tasks(Enum): | |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard | |
task0 = Task("Math", "acc", "Math") | |
task1 = Task("Chemistry", "acc", "Chemistry") | |
task2 = Task("Physics", "acc", "Physics") | |
task3 = Task("Arabic", "acc", "Arabic") | |
task4 = Task("English", "acc", "English") | |
task5 = Task("Religion", "acc", "Religion") | |
task6 = Task("Persian Literature", "acc", "Persian Literature") | |
NUM_FEWSHOT = 0 # Change with your few shot | |
# --------------------------------------------------- | |
# Your leaderboard name | |
TITLE = """<h1 align="center" id="space-title">IRUEX leaderboard</h1>""" | |
# What does your leaderboard evaluate? | |
INTRODUCTION_TEXT = """ | |
Intro text | |
""" | |
# Which evaluations are you running? how can people reproduce what you have? | |
LLM_BENCHMARKS_TEXT = f""" | |
## How it works | |
## Reproducibility | |
To reproduce our results, here is the commands you can run: | |
""" | |
# Your leaderboard name | |
TITLE = """<h1 align="center" id="space-title">IRUEX Leaderboard</h1>""" | |
# What does your leaderboard evaluate? | |
INTRODUCTION_TEXT = """ | |
**Welcome to the IRUEX Leaderboard!** | |
This platform evaluates large language models based on Iran's University Entrance Exam subjects. | |
[Explore the IRUEX Dataset](https://github.com/hamedkhaledi/IRUEX-dataset) on GitHub. | |
""" | |
# Which evaluations are you running? How can people reproduce them? | |
LLM_BENCHMARKS_TEXT = """ | |
## Evaluation Process | |
We assess models across various subjects, including Math, Chemistry, Physics, Arabic, English, Religion, and Persian Literature. Each model's performance is measured using accuracy metrics specific to each subject. | |
## Reproducibility | |
To reproduce our results, execute the following commands: | |
```bash | |
# Example command to run evaluations | |
python evaluate_model.py --model_name your_model_name --task Math --num_fewshot 0 | |
``` | |
""" | |
EVALUATION_QUEUE_TEXT = "" | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
CITATION_BUTTON_TEXT = r""" | |
@inproceedings{khademi-khaledi-faili-2025-iruex, | |
title = "{IRUEX}: A Study on Large Language Models' Problem-Solving Skills in Iran's University Entrance Exam", | |
author = "Khademi Khaledi, Hamed and Faili, Heshaam", | |
editor = "Rambow, Owen and Wanner, Leo and Apidianaki, Marianna and Al-Khalifa, Hend and Di Eugenio, Barbara and Schockaert, Steven", | |
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics", | |
month = jan, | |
year = "2025", | |
address = "Abu Dhabi, UAE", | |
publisher = "Association for Computational Linguistics", | |
url = "https://aclanthology.org/2025.coling-main.434/", | |
pages = "6505--6519", | |
} | |
""" | |