File size: 3,047 Bytes
efeee6d
314f91a
95f85ed
efeee6d
 
 
 
 
 
314f91a
b899767
 
efeee6d
943f952
44b7871
 
 
 
 
9c0e910
ac00ce9
9de02c7
1be492d
 
 
 
 
f2c4ecc
 
 
 
 
523fe7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f13d9c
 
1ffc326
b899767
 
efeee6d
 
 
a5eb4ca
58733e4
efeee6d
8c49cb6
85bb81f
0227006
 
efeee6d
0227006
d313dbd
 
 
9833cdb
d16cee2
d313dbd
 
8c49cb6
a5eb4ca
 
 
f1e1bb2
a5eb4ca
 
 
 
 
f1e1bb2
 
a5eb4ca
 
 
 
 
58733e4
2a73469
 
217b585
3b8481d
 
 
 
 
 
9833cdb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from dataclasses import dataclass
from enum import Enum

@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard 
    task0 = Task("Count", "acc", "Count")
    task1 = Task("Order", "acc", "Order")
    task2 = Task("VCR", "acc", "VCR")
    task3 = Task("Culture", "acc", "Culture")
    task4 = Task("Trick", "acc", "Trick")

class N_Tasks(Enum):
    task0_f1 = Task("Count", "f1", "Count")
    task1_f1 = Task("Order", "f1", "Order")
    task3_f1 = Task("Culture", "f1", "Culture")
    task4_f1 = Task("Trick", "f1", "Trick")

class Detail_Tasks(Enum):
    task0_0 = Task("Count 0", "acc", "Count 0")
    task0_1 = Task("Count 1", "acc", "Count 1")
    task0_2 = Task("Count 2", "acc", "Count 2")
    task0_3 = Task("Count 3", "acc", "Count 3")
    task0_4 = Task("Count 4", "acc", "Count 4")
    task1_0 = Task("Order 0", "acc", "Order 0")
    task1_1 = Task("Order 1", "acc", "Order 1")
    task1_2 = Task("Order 2", "acc", "Order 2")
    task1_3 = Task("Order 3", "acc", "Order 3")
    task1_4 = Task("Order 4", "acc", "Order 4")
    task3_0 = Task("Culture 0", "acc", "Culture 0")
    task3_1 = Task("Culture 1", "acc", "Culture 1")
    task3_2 = Task("Culture 2", "acc", "Culture 2")
    task3_3 = Task("Culture 3", "acc", "Culture 3")
    task3_4 = Task("Culture 4", "acc", "Culture 4")
    task4_0 = Task("Trick 0", "acc", "Trick 0")
    task4_1 = Task("Trick 1", "acc", "Trick 1")
    task4_2 = Task("Trick 2", "acc", "Trick 2")
    task4_3 = Task("Trick 3", "acc", "Trick 3")
    task4_4 = Task("Trick 4", "acc", "Trick 4")


NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------



# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">DARE leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
Intro text
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## How it works

## Reproducibility
To reproduce our results, here is the commands you can run:

"""

EVALUATION_QUEUE_TEXT = """
## Evaluation Queue

To add your model to the evaluation queue, please submit a file with your model predictions with the id and predicted options. The file should be in the following format:
```
[
    {"id": "0", "prediction": ["A"]},
    {"id": "1", "prediction": ["B"]},
    {"id": "2", "prediction": ["A", "C"]},
    ...
]
```

Make sure to include the following information in your submission:
- Model name
- Output format
- Task version
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@article{sterz2024dare,
  title={DARE: Diverse Visual Question Answering with Robustness Evaluation},
  author={Sterz, Hannah and Pfeiffer, Jonas and Vuli{\'c}, Ivan},
  journal={arXiv preprint arXiv:2409.18023},
  year={2024}
}
"""