KatherLab Medical LLM Leaderboard

from dataclasses import dataclass
from enum import Enum

@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard 
    task0 = Task("metric1", "acc", "Metric1")
    task1 = Task("metric2", "acc_norm", "Metric2")

NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------


# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">KatherLab Medical LLM Leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
This Leaderboards compares the performance of LLM models regarding multiple information extraction tasks from medical documents.
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = """
## How it works

We created multiple benchmark experiments. We use llama-cpp with restricted generations to evaluate LLM models on medical text. 

## Reproducibility

As of right now, the evaluation datasets are not publicly available. Please reach out to us if you have any questions.

"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""Wiest, I. C., Wolf, F., Leßmann, M.-E., van Treeck, M., Ferber, D., Zhu, J., Boehme, H., Bressem, K. K., Ulrich, H., Ebert, M. P., & Kather, J. N. (2024). LLM-AIx: An open source pipeline for Information Extraction from unstructured medical text based on privacy preserving Large Language Models. https://doi.org/10.1101/2024.09.02.24312917 

@misc{Wiest_Wolf_Leßmann_van Treeck_Ferber_Zhu_Boehme_Bressem_Ulrich_Ebert_et al._2024, 
title={LLM-AIx: An open source pipeline for Information Extraction from unstructured medical text based on privacy preserving Large Language Models}, 
url={http://dx.doi.org/10.1101/2024.09.02.24312917}, 
DOI={10.1101/2024.09.02.24312917}, 
author={Wiest, Isabella Catharina and Wolf, Fabian and Leßmann, Marie-Elisabeth and van Treeck, Marko and Ferber, Dyke and Zhu, Jiefu and Boehme, Heiko and Bressem, Keno K. and Ulrich, Hannes and Ebert, Matthias P. and Kather, Jakob Nikolas}, 
year={2024}, month=sep }


"""