File size: 3,875 Bytes
7e98abb 08b45d4 7e98abb 61e7dfb 56451ad 7e98abb 08b45d4 61e7dfb ec1a7eb 61e7dfb 7e98abb 08b45d4 7e98abb 40fda8f 7e98abb 6b7cc2e 56451ad 09a770c 56451ad 467d1f8 7e98abb 467d1f8 6b7cc2e 7e98abb 5842c1d 6b7cc2e 09a770c 5842c1d 6b7cc2e 09a770c 6b7cc2e 09a770c 6b7cc2e 7e98abb 09a770c 7e98abb 5d2051e 7e98abb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
from dataclasses import dataclass
from enum import Enum
@dataclass
class Task:
benchmark: str
metric: str
col_name: str
category: str
higher_is_better: bool = True
scale_by_100: bool = True
# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
task1 = Task("ami_2020_aggressiveness", "f1,none", "AMI 2020 Agg", "NLU")
task2 = Task("ami_2020_misogyny", "f1,none", "AMI 2020 Miso", "NLU")
task0 = Task("arc_challenge_ita", "acc_norm,none", "ARC-C", "CFK")
task4 = Task("belebele_ita", "acc_norm,none", "Belebele", "NLU")
task3 = Task("gente_rephrasing", "acc,none", "GeNTE Neutralizing", "BFS")
task12 = Task("haspeede2_hs", "f1,none", "HaSpeeDe2 HS", "BFS")
task13 = Task("haspeede2_stereo", "f1,none", "HaSpeeDe2 Stereo", "BFS")
task5 = Task("hatecheck_ita", "f1,none", "HateCheck", "BFS")
task6 = Task("honest_ita", "acc,none", "HONEST", "BFS", higher_is_better=False)
task14 = Task("ironita_irony", "f1,none", "IronITA Irony", "NLU")
task15 = Task("ironita_sarcasm", "f1,none", "IronITA Sarcasm", "NLU")
task7 = Task("itacola", "mcc,none", "ItaCoLA", "NLU", scale_by_100=False)
task8 = Task("news_sum_fanpage", "bertscore,none", "News Sum (fanpage)", "NLU")
task18 = Task("news_sum_ilpost", "bertscore,none", "News Sum (il post)", "NLU")
task16 = Task("sentipolc", "f1,none", "SENTIPOLC", "NLU")
task9 = Task("squad_it", "squad_f1,get-answer", "SQuAD it", "CFK")
task10 = Task("truthfulqa_mc2_ita", "acc,none", "TruthfulQA", "CFK")
task11 = Task("xcopa_it", "acc,none", "XCOPA", "CFK")
task17 = Task("hellaswag_ita", "acc_norm,none", "Hellaswag-it", "CFK")
NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------
# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">ItaEval leaderboard</h1>"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
This leaderboard evaluates language models on <b>ItaEval</b>, a new unified benchmark for Italian.
Submit your model: [Google Form](https://forms.gle/xpGH66DpVRcCmdcJ6)
Some information:
- Unlike other leaderboards you may find online, we do not support automatic evaluation for new model submissions. Currently, we are taking care of running models on the suite. Please complete the form above to have your model evaluated and included here.
- You can find some more details on the [official web page](https://rita-nlp.org/sprints/itaeval/)
"""
ITA_EVAL_REPO = "https://github.com/RiTA-nlp/ita-eval"
# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## How it works
## Reproducibility
Head to {ITA_EVAL_REPO} to reproduce our results for all the instructions.
If all the setup goes smoothly, you can run 'MODEL' on ItaEval with:
```bash
MODEL="your-model-id-on-the-huggingface-hub"
lm_eval --model hf \
--model_args pretrained=$MODEL,dtype=bfloat16 \
--tasks ita_eval \
--batch_size 1 \
--log_samples \
--output_path "."
```
"""
EVALUATION_QUEUE_TEXT = """
We do not plan to accept autonomous submissions, yet. Fill [this form](https://forms.gle/xpGH66DpVRcCmdcJ6) to have your model evaluated.
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@inproceedings{attanasio2024itaeval,
title={ItaEval and TweetyIta: A New Extensive Benchmark and Efficiency-First Language Model for Italian},
author={Attanasio, Giuseppe and Delobelle, Pieter and La Quatra, Moreno and Santilli, Andrea and Savoldi, Beatrice},
booktitle={CLiC-it 2024: Tenth Italian Conference on Computational Linguistics, Date: 2024/12/04-2024/12/06, Location: Pisa, Italy},
year={2024}
}
"""
|