Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 4,345 Bytes
8c49cb6 99a4ea0 8c49cb6 99a4ea0 d7a1307 314f91a 8c49cb6 b0dac31 8c49cb6 245d717 99a4ea0 d7a1307 99a4ea0 c0691b9 99a4ea0 257b821 dc8017a b0dac31 835517f b0dac31 f5348ec 0cb52bf 8f35a3f f5348ec 8c49cb6 a684f6e 245d717 c1b8a96 8c49cb6 d7a1307 95df80d d7a1307 8c49cb6 eed1ccd 6e6645b 8c49cb6 6e6645b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import json
import os
import datasets
import pandas as pd
from src.about import Tasks
from src.display.formatting import has_no_nan_values, make_clickable_model, make_clickable_model_with_shot
from src.display.utils import AutoEvalColumn, EvalQueueColumn
# The values of these columns are in the range of 0-100
# We normalize them to 0-1
COLUMNS_TO_NORMALIZE = [
"ALT E to J BLEU",
"ALT J to E BLEU",
"WikiCorpus E to J BLEU",
"WikiCorpus J to E BLEU",
"XL-Sum JA BLEU",
"XL-Sum ROUGE1",
"XL-Sum ROUGE2",
"XL-Sum ROUGE-Lsum",
]
def get_leaderboard_df(contents_repo: str, cols: list[str], benchmark_cols: list[str]) -> pd.DataFrame:
df = datasets.load_dataset(contents_repo, split="train").to_pandas()
# df["Model"] = df["model"].map(make_clickable_model)
df["Model"] = df.apply(lambda x: make_clickable_model_with_shot(x["model"], x["num_few_shot"]), axis=1)
df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
df = df.rename(columns={task.value.metric: task.value.col_name for task in Tasks})
df = df.rename(
columns={
"architecture": "Architecture",
"weight_type": "Weight type",
"precision": "Precision",
"license": "Hub License",
"params": "#Params (B)",
"likes": "Hub ❤️",
"revision": "Revision",
"num_few_shot": "Few-shot",
"add_special_tokens": "Add Special Tokens",
"llm_jp_eval_version": "llm-jp-eval version",
"vllm_version": "vllm version",
"model_type": "Type",
"model": "model_name_for_query",
}
)
# Add a row ID column
df[AutoEvalColumn.row_id.name] = range(len(df))
# Normalize the columns
available_columns_to_normalize = [col for col in COLUMNS_TO_NORMALIZE if col in df.columns]
df[available_columns_to_normalize] = df[available_columns_to_normalize] / 100
df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
df = df[cols].round(decimals=4)
# filter out if any of the benchmarks have not been produced
df = df[has_no_nan_values(df, benchmark_cols)]
return df
def get_evaluation_queue_df(save_path: str, cols: list[str]) -> list[pd.DataFrame]:
"""Creates the different dataframes for the evaluation queues requestes"""
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
# data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.model.name] = make_clickable_model_with_shot(
data["model"],
data["num_few_shot"], # num_few_shotは必ず存在するため、直接参照
)
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
failed_list = [e for e in all_evals if e["status"] == "FAILED"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
df_failed = pd.DataFrame.from_records(failed_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols], df_failed[cols]
|