File size: 2,211 Bytes
c08abba
 
 
 
 
 
 
 
 
 
 
 
 
 
254da86
 
 
 
 
 
 
c08abba
dfc075f
 
 
 
 
 
 
 
 
 
 
 
 
 
c08abba
 
afb233a
c08abba
 
 
af0f401
c08abba
 
 
af0f401
c08abba
 
 
254da86
 
 
 
 
 
 
 
 
 
 
 
c08abba
 
 
 
af0f401
 
c08abba
 
 
 
 
af0f401
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from dataclasses import dataclass
from enum import Enum

@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Init: to update with your specific keys
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard 
    task0 = Task("MMLU", "metric_name", "MMLU")
    task1 = Task("Synthetic_QA", "metric_name", "Synthetic_QA")
    task2 = Task("Support_MC", "metric_name", "Support_MC")
    task3 = Task("Context_QA", "metric_name", "Context_QA")
    task4 = Task("Banking_MC", "metric_name", "Banking_MC")
    task5 = Task("ARC", "metric_name", "ARC")
    task6 = Task("Binary_QA", "metric_name", "Binary_QA")
    task7 = Task("ANL_Quad", "metric_name", "ANL_Quad")
    
    
class Tasks_Grouped(Enum):
    task0 = Task("MMLU", "metric_name", "MMLU")
    task1 = Task("Synthetic_QA", "metric_name", "Synthetic_QA")
    task2 = Task("Support_MC", "metric_name", "Support_MC")
    task3 = Task("Context_QA", "metric_name", "Context_QA")
    task4 = Task("Banking_MC", "metric_name", "Banking_MC")
    task5 = Task("ARC", "metric_name", "ARC")
    task6 = Task("Binary_QA", "metric_name", "Binary_QA")
    task7 = Task("ANL_Quad", "metric_name", "ANL_Quad")
    




# Your leaderboard name
TITLE = """<h1 align="center" id="space-title"> Azerbaijani LLM Leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
Welcome to Kapital Bank's Azerbaijani LLM Leaderboard. We use benchmarks in finance, banking, and general knowledge for accurate evaluations.

πŸš€ Submit Your Model πŸš€

If you have a fine-tuned Azerbaijani LLM, submit it for evaluation!

"""

LLM_BENCHMARKS_TEXT = f"""
## How it works
## Reproducibility
HERE we have about part
"""

LLM_DATASET_TEXT = f"""
## How it works
## Reproducibility
HERE we have about part
"""


EVALUATION_QUEUE_TEXT = """
## Some good practices before submitting a model

### 1) Make sure your model exists on hub.
### 2) Make sure your model is public.


## In case of model failure
If your model is displayed in the `FAILED` category, its execution stopped.
Make sure you have followed the above steps first.
Please contact us if you are facing any trouble!
"""