BenCzechMark-unstable

Runtime error

File size: 6,210 Bytes

b66f230

import copy
import glob
import json
import os

import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, snapshot_download

from compare_significance import check_significance
from model_compare import ModelCompare

api = HfApi()

ORG = "CZLC"
REPO = f"{ORG}/LLM_benchmark_data"
HF_TOKEN = os.environ.get("HF_TOKEN")
TASKS_METADATA_PATH = "./tasks_metadata.json"


class LeaderboardServer:
    def __init__(self):
        self.server_address = REPO
        self.repo_type = "dataset"
        self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
                                                   local_dir="./")
        self.submisssion_id_to_file = {}  # Map submission ids to file paths
        self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))['tasks']
        self.submission_ids = set()
        self.comparer = ModelCompare(self.tasks_metadata.keys())
        self.fetch_existing_models()
        self.tournament_results = self.load_tournament_results()
        self.pre_submit = None

    def update_leaderboard(self):
        self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
                                                   local_dir="./")
        self.fetch_existing_models()
        self.tournament_results = self.load_tournament_results()

    def load_tournament_results(self):
        metadata_rank_paths = os.path.join(self.local_leaderboard, "tournament.json")
        if not os.path.exists(metadata_rank_paths):
            return {}
        with open(metadata_rank_paths) as ranks_file:
            results = json.load(ranks_file)
        return results

    def fetch_existing_models(self):
        # Models data
        for submission in glob.glob(os.path.join(self.local_leaderboard, "data") + "/*.json"):
            data = json.load(open(submission))
            metadata = data.get('metadata')
            if metadata is None:
                continue
            submission_id = metadata["team_name"] + "_" + metadata["submission_id"]
            self.submission_ids.add(submission_id)

            self.submisssion_id_to_file[submission_id] = submission

    def get_leaderboard(self, tournament_results=None):
        rank_based_on = tournament_results if tournament_results else self.tournament_results

        if len(rank_based_on) == 0:
            return pd.DataFrame(columns=['No submissions yet'])
        else:
            ranks = self.comparer.get_tasks_ranks(rank_based_on)
            results = []
            for submission in rank_based_on.keys():
                path = self.submisssion_id_to_file.get(submission)
                if path is None:
                    if self.pre_submit and submission == self.pre_submit[1]:
                        data = json.load(open(self.pre_submit[2]))
                    else:
                        raise gr.Error(f"Internal error: Submission [{submission}] not found")
                elif path:
                    data = json.load(open(path))
                else:
                    raise gr.Error(f"Submission [{submission}] not found")
                submission_id = data["metadata"]["team_name"] + "_" + data["metadata"]["submission_id"]

                local_results = {task: list(task_ranks).index(submission_id) + 1 for task, task_ranks in ranks.items()}
                local_results["submission_id"] = submission_id
                if self.pre_submit and submission == self.pre_submit[1]:
                    results.insert(0, local_results)
                else:
                    results.append(local_results)
            dataframe = pd.DataFrame.from_records(results)
            df_order = ["submission_id"] + [col for col in dataframe.columns if col != "submission_id"]
            dataframe = dataframe[df_order]
            dataframe = dataframe.rename(columns={key: value["name"] for key, value in self.tasks_metadata.items()})
            return dataframe

    def start_tournament(self, new_model_id, new_model_file):
        new_tournament = copy.deepcopy(self.tournament_results)
        new_tournament[new_model_id] = {}
        new_tournament[new_model_id][new_model_id] = {task: False for task in self.tasks_metadata.keys()}

        for model in self.submission_ids:
            res = check_significance(new_model_file, self.submisssion_id_to_file[model])
            res_inverse = check_significance(self.submisssion_id_to_file[model], new_model_file)
            new_tournament[new_model_id][model] = {
                task: data["significant"] for task, data in res.items()
            }
            new_tournament[model][new_model_id] = {
                task: data["significant"] for task, data in res_inverse.items()
            }
        return new_tournament

    def prepare_model_for_submission(self, file, metadata) -> None:
        with open(file, "r") as f:
            data = json.load(f)
        data["metadata"] = metadata
        with open(file, "w") as f:
            json.dump(data, f)

        model_id = metadata["team_name"] + "_" + metadata["submission_id"]
        tournament_results = self.start_tournament(model_id, file)
        self.pre_submit = tournament_results, model_id, file

    def save_pre_submit(self):
        if self.pre_submit:
            tournament_results, model_id, file = self.pre_submit
            filename = os.path.basename(file)
            api.upload_file(
                path_or_fileobj=file,
                path_in_repo=f"data/{model_id}_{filename}",
                repo_id=self.server_address,
                repo_type=self.repo_type,
                token=HF_TOKEN,
            )

            # Temporary save tournament results
            tournament_results_path = os.path.join(self.local_leaderboard, "tournament.json")
            with open(tournament_results_path, "w") as f:
                json.dump(tournament_results, f)

            api.upload_file(
                path_or_fileobj=tournament_results_path,
                path_in_repo="tournament.json",
                repo_id=self.server_address,
                repo_type=self.repo_type,
                token=HF_TOKEN,
            )