from dataclasses import dataclass from enum import Enum @dataclass class Task: benchmark: str metric: str col_name: str # Select your tasks here # --------------------------------------------------- class Tasks(Enum): # task_key in the json file, metric_key in the json file, name to display in the leaderboard task0 = Task("Spelling", "exact_match", "spell") task1 = Task("Inverse Spelling", "exact_match", "spell_inverse") task2 = Task("Contains Char", "exact_match", "cont_char") task3 = Task("contains_word", "exact_match", "cont_word") task4 = Task("orth", "exact_match", "orth") task5 = Task("sem", "exact_match", "sem") task6 = Task("insert_char", "exact_match", "ins_char") task7 = Task("insert_word", "exact_match", "ins_word") task8 = Task("del_char", "exact_match", "del_char") task9 = Task("del_word", "exact_match", "del_word") task10 = Task("sub_char", "exact_match", "sub_char") task11 = Task("sub_word", "exact_match", "sub_word") task12 = Task("swap_char", "exact_match", "swap_char") task13 = Task("swap_word", "exact_match", "swap_word") NUM_FEWSHOT = 0 # Change with your few shot # --------------------------------------------------- # Your leaderboard name TITLE = """

CUTE Leaderboard

""" # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ This is the evaluation leaderboard for CUTE, an orthographic understanding benchmark. """ # Which evaluations are you running? how can people reproduce what you have? LLM_BENCHMARKS_TEXT = f""" ## How it works For more details, visit our repo: https://github.com/leukas/cute """