Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 3,981 Bytes
8c49cb6 99a4ea0 8c49cb6 99a4ea0 df66f6e 314f91a 8c49cb6 b0dac31 8c49cb6 245d717 99a4ea0 c0691b9 99a4ea0 257b821 dc8017a b0dac31 835517f b0dac31 f5348ec 0cb52bf 8f35a3f f5348ec 8c49cb6 a684f6e 245d717 c1b8a96 8c49cb6 eed1ccd 6e6645b 8c49cb6 6e6645b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import json
import os
import datasets
import pandas as pd
from src.about import Tasks
from src.display.formatting import has_no_nan_values, make_clickable_model
from src.display.utils import AutoEvalColumn, EvalQueueColumn
# The values of these columns are in the range of 0-100
# We normalize them to 0-1
COLUMNS_TO_NORMALIZE = [
"ALT E to J BLEU",
"ALT J to E BLEU",
"WikiCorpus E to J BLEU",
"WikiCorpus J to E BLEU",
"XL-Sum JA BLEU",
"XL-Sum ROUGE1",
"XL-Sum ROUGE2",
"XL-Sum ROUGE-Lsum",
]
def get_leaderboard_df(contents_repo: str, cols: list[str], benchmark_cols: list[str]) -> pd.DataFrame:
df = datasets.load_dataset(contents_repo, split="train").to_pandas()
df["Model"] = df["model"].map(make_clickable_model)
df["T"] = df["model_type"].map(lambda x: x.split(":")[0].strip())
df = df.rename(columns={task.value.metric: task.value.col_name for task in Tasks})
df = df.rename(
columns={
"architecture": "Architecture",
"weight_type": "Weight type",
"precision": "Precision",
"license": "Hub License",
"params": "#Params (B)",
"likes": "Hub ❤️",
"revision": "Revision",
"num_few_shot": "Few-shot",
"add_special_tokens": "Add Special Tokens",
"llm_jp_eval_version": "llm-jp-eval version",
"vllm_version": "vllm version",
"model_type": "Type",
"model": "model_name_for_query",
}
)
# Add a row ID column
df[AutoEvalColumn.row_id.name] = range(len(df))
# Normalize the columns
available_columns_to_normalize = [col for col in COLUMNS_TO_NORMALIZE if col in df.columns]
df[available_columns_to_normalize] = df[available_columns_to_normalize] / 100
df = df.sort_values(by=[AutoEvalColumn.AVG.name], ascending=False)
df = df[cols].round(decimals=4)
# filter out if any of the benchmarks have not been produced
df = df[has_no_nan_values(df, benchmark_cols)]
return df
def get_evaluation_queue_df(save_path: str, cols: list[str]) -> list[pd.DataFrame]:
"""Creates the different dataframes for the evaluation queues requestes"""
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
all_evals = []
for entry in entries:
if ".json" in entry:
file_path = os.path.join(save_path, entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
elif ".md" not in entry:
# this is a folder
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")]
for sub_entry in sub_entries:
file_path = os.path.join(save_path, entry, sub_entry)
with open(file_path) as fp:
data = json.load(fp)
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
all_evals.append(data)
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
failed_list = [e for e in all_evals if e["status"] == "FAILED"]
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
df_failed = pd.DataFrame.from_records(failed_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols], df_failed[cols]
|