File size: 2,788 Bytes
9203553 cdf268e 6c92442 9203553 2d26479 6c92442 e2473e2 2d26479 9203553 e2473e2 9203553 6c92442 9203553 6c92442 aa8b23d 6c92442 aa8b23d dc801c4 6c92442 dc801c4 6c92442 aa8b23d 2d26479 9203553 2d0af54 9203553 2d0af54 9203553 2d0af54 9203553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import logging
import os
import pandas as pd # type: ignore[import]
from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
from .formatting import model_hyperlink
from .leaderboard_formatting import (
COLUMNS_PRETTY,
METRICS_PER_TASK,
SORT_COLUMN_PER_TASK,
get_columns_per_task,
)
from .tasks_content import TASKS_PRETTY_REVERSE
try:
AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
except FileNotFoundError:
AVAILABLE_TASKS = []
def _get_results_stub() -> pd.DataFrame:
stub_df = pd.DataFrame(
[
{
"Model Name": "GPT-4",
"Availability": "Proprietary",
"Context Size": "16k",
"BLEU": "X",
"ROUGE": "X",
"ChrF": "X",
"BERTScore": "X",
"BERTScore (Normalized)": "X",
"Submitted By": "π Long Code Arena Team",
},
{
"Model Name": "CodeLlama-7b (instruct)",
"Availability": "Llama 2 license",
"Context Size": "16k",
"BLEU": "X",
"ROUGE": "X",
"ChrF": "X",
"BERTScore": "X",
"BERTScore (Normalized)": "X",
"Submitted By": "π Long Code Arena Team",
},
]
)
return stub_df
def _get_results_dataset(task_id: str) -> pd.DataFrame:
results_df = load_dataset(os.environ["DATASET_ID"], task_id, split="test").to_pandas()
results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)
results_df = results_df.sort_values(by=SORT_COLUMN_PER_TASK[task_id], ascending=False)
for metric_column in METRICS_PER_TASK[task_id]:
if "BERTScore" in metric_column:
results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.5f}")
else:
results_df[metric_column] = results_df[metric_column].map(lambda x: f"{x:.2f}")
results_df["Model Name"] = [
model_hyperlink(link=link, model_name=model_name) if link else model_name
for link, model_name in zip(results_df["model_url"], results_df["Model Name"])
]
results_df = results_df[get_columns_per_task(task_id)]
return results_df
def get_results_for_task(task_pretty: str) -> pd.DataFrame:
task_id = TASKS_PRETTY_REVERSE[task_pretty]
if task_id in AVAILABLE_TASKS:
logging.info(f"Retrieving results for {task_pretty}...")
return _get_results_dataset(task_id)
logging.info(f"Generating leaderboard stub for {task_pretty}...")
return _get_results_stub()
|