File size: 1,955 Bytes
9203553 cdf268e 9203553 e2473e2 9203553 e2473e2 9203553 e2473e2 9203553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import logging
import os
import pandas as pd # type: ignore[import]
from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
from .leaderboard_formatting import COLUMNS_PRETTY, get_columns_per_task
AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
def _get_results_stub() -> pd.DataFrame:
stub_df = pd.DataFrame(
[
{
"Model Name": "GPT-4",
"Availability": "Proprietary",
"Context Size": "16k",
"BLEU": "X",
"ROUGE": "X",
"ChrF": "X",
"BERTScore": "X",
"BERTScore (Normalized)": "X",
"Submitted By": "π Long Code Arena Team",
},
{
"Model Name": "CodeLlama-7b (instruct)",
"Availability": "Llama 2 license",
"Context Size": "16k",
"BLEU": "X",
"ROUGE": "X",
"ChrF": "X",
"BERTScore": "X",
"BERTScore (Normalized)": "X",
"Submitted By": "π Long Code Arena Team",
},
]
)
return stub_df
def _get_results_dataset(task_id: str) -> pd.DataFrame:
results_df = load_dataset(
os.environ["DATASET_ID"], task_id, split="test"
).to_pandas()
results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
results_df["Context Size"] = results_df["Context Size"].map(
lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x
)
results_df = results_df[get_columns_per_task(task_id)]
return results_df
def get_results_for_task(task_id: str) -> pd.DataFrame:
if task_id in AVAILABLE_TASKS:
logging.info(f"Retrieving results for {task_id}...")
return _get_results_dataset(task_id)
logging.info(f"Generating leaderboard stub for {task_id}...")
return _get_results_stub()
|