Spaces:
Sleeping
Sleeping
File size: 3,047 Bytes
efeee6d 314f91a 95f85ed efeee6d 314f91a b899767 efeee6d 943f952 44b7871 9c0e910 ac00ce9 9de02c7 1be492d f2c4ecc 523fe7b 3f13d9c 1ffc326 b899767 efeee6d a5eb4ca 58733e4 efeee6d 8c49cb6 85bb81f 0227006 efeee6d 0227006 d313dbd 9833cdb d16cee2 d313dbd 8c49cb6 a5eb4ca f1e1bb2 a5eb4ca f1e1bb2 a5eb4ca 58733e4 2a73469 217b585 3b8481d 9833cdb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from dataclasses import dataclass
from enum import Enum
@dataclass
class Task:
benchmark: str
metric: str
col_name: str
# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
task0 = Task("Count", "acc", "Count")
task1 = Task("Order", "acc", "Order")
task2 = Task("VCR", "acc", "VCR")
task3 = Task("Culture", "acc", "Culture")
task4 = Task("Trick", "acc", "Trick")
class N_Tasks(Enum):
task0_f1 = Task("Count", "f1", "Count")
task1_f1 = Task("Order", "f1", "Order")
task3_f1 = Task("Culture", "f1", "Culture")
task4_f1 = Task("Trick", "f1", "Trick")
class Detail_Tasks(Enum):
task0_0 = Task("Count 0", "acc", "Count 0")
task0_1 = Task("Count 1", "acc", "Count 1")
task0_2 = Task("Count 2", "acc", "Count 2")
task0_3 = Task("Count 3", "acc", "Count 3")
task0_4 = Task("Count 4", "acc", "Count 4")
task1_0 = Task("Order 0", "acc", "Order 0")
task1_1 = Task("Order 1", "acc", "Order 1")
task1_2 = Task("Order 2", "acc", "Order 2")
task1_3 = Task("Order 3", "acc", "Order 3")
task1_4 = Task("Order 4", "acc", "Order 4")
task3_0 = Task("Culture 0", "acc", "Culture 0")
task3_1 = Task("Culture 1", "acc", "Culture 1")
task3_2 = Task("Culture 2", "acc", "Culture 2")
task3_3 = Task("Culture 3", "acc", "Culture 3")
task3_4 = Task("Culture 4", "acc", "Culture 4")
task4_0 = Task("Trick 0", "acc", "Trick 0")
task4_1 = Task("Trick 1", "acc", "Trick 1")
task4_2 = Task("Trick 2", "acc", "Trick 2")
task4_3 = Task("Trick 3", "acc", "Trick 3")
task4_4 = Task("Trick 4", "acc", "Trick 4")
NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------
# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">DARE leaderboard</h1>"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
Intro text
"""
# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## How it works
## Reproducibility
To reproduce our results, here is the commands you can run:
"""
EVALUATION_QUEUE_TEXT = """
## Evaluation Queue
To add your model to the evaluation queue, please submit a file with your model predictions with the id and predicted options. The file should be in the following format:
```
[
{"id": "0", "prediction": ["A"]},
{"id": "1", "prediction": ["B"]},
{"id": "2", "prediction": ["A", "C"]},
...
]
```
Make sure to include the following information in your submission:
- Model name
- Output format
- Task version
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@article{sterz2024dare,
title={DARE: Diverse Visual Question Answering with Robustness Evaluation},
author={Sterz, Hannah and Pfeiffer, Jonas and Vuli{\'c}, Ivan},
journal={arXiv preprint arXiv:2409.18023},
year={2024}
}
"""
|