|
from dataclasses import dataclass |
|
from enum import Enum |
|
import json |
|
|
|
|
|
@dataclass |
|
class Task: |
|
benchmark: str |
|
metric: str |
|
col_name: str |
|
|
|
|
|
|
|
def create_task_list(): |
|
|
|
with open("src/datasets.json") as f: |
|
|
|
data = json.load(f) |
|
|
|
groups = [] |
|
names = [] |
|
for d in data: |
|
groups.append(d['group']) |
|
names.append(d['name']) |
|
|
|
tasks = [] |
|
for name in names: |
|
tasks.append(Task(name, "metric_name", name)) |
|
|
|
return tasks, list(set(groups)) |
|
|
|
|
|
|
|
|
|
TITLE = """<h1 align="center" id="space-title"> Azerbaijani LLM Leaderboard</h1>""" |
|
|
|
|
|
INTRODUCTION_TEXT = """ |
|
Welcome to Kapital Bank's Azerbaijani LLM Leaderboard. We use benchmarks in finance, banking, and general knowledge for accurate evaluations. |
|
|
|
π Submit Your Model π |
|
|
|
If you have a fine-tuned Azerbaijani LLM, submit it for evaluation! |
|
|
|
""" |
|
|
|
LLM_BENCHMARKS_TEXT = f""" |
|
## How it works |
|
## Reproducibility |
|
HERE we have about part |
|
""" |
|
|
|
LLM_DATASET_TEXT = f""" |
|
## How it works |
|
## Reproducibility |
|
HERE we have about part |
|
""" |
|
|
|
|
|
EVALUATION_QUEUE_TEXT = """ |
|
## Some good practices before submitting a model |
|
|
|
### 1) Make sure your model exists on hub. |
|
### 2) Make sure your model is public. |
|
|
|
|
|
## In case of model failure |
|
If your model is displayed in the `FAILED` category, its execution stopped. |
|
Make sure you have followed the above steps first. |
|
Please contact us if you are facing any trouble! |
|
""" |