Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
import json | |
from huggingface_hub import HfApi | |
import pandas as pd | |
import os | |
token = os.getenv('HF_API_KEY') | |
print(f"The token is {token}") | |
api = HfApi(token=token) | |
def compute_df(): | |
# download all files in https://huggingface.co/illuin-cde/baselines | |
files = [f for f in api.list_repo_files("illuin-cde/baselines-v3") if f.startswith("metrics")] | |
print(files) | |
metrics = [] | |
cols = ["model", "is_contextual", "pooling"] | |
for file in files: | |
result_path = api.hf_hub_download("illuin-cde/baselines-v3", filename=file) | |
with open(result_path, "r") as f: | |
dic = json.load(f) | |
if dic["is_contextual"] not in ["multi", "single", "hybrid"]: | |
continue | |
# prefix = dic.get("pooling", "default") | |
dic["model"] = dic.get("model").split("/")[-1] # + "-" + prefix | |
metrics_cur = dic["metrics"] | |
for k, v in metrics_cur.items(): | |
dic.update({k: v["ndcg_at_10"]}) | |
if k not in cols: | |
cols.append(k) | |
del dic["metrics"] | |
metrics.append(dic) | |
df = pd.DataFrame(metrics) | |
df = df[cols] | |
# df["model"] = df["model"].apply(lambda x: x.split("/")[-1]) | |
# round all numeric columns | |
# avg all numeric columns | |
df["avg"] = df.iloc[:, 3:].mean(axis=1) | |
df = df.round(3) | |
# sort by ndcg_at_5 | |
df = df.sort_values(by="avg", ascending=False) | |
# gradio display | |
# gradio_df = gr.Dataframe(df) | |
# return gradio_df | |
return df | |
gradio_df = compute_df() | |
def refresh(model_query_regex): | |
if not model_query_regex: | |
return gradio_df | |
# filter the dataframe based on regex | |
return gr.Dataframe(gradio_df[gradio_df["model"].str.contains(model_query_regex, flags=re.IGNORECASE, regex=True)]) | |
# refresh button and precompute | |
# gr.Interface(fn=compute_df, title="Results Leaderboard", inputs=None, outputs="dataframe").launch() | |
gr.Interface(fn=refresh, title="Results Leaderboard", inputs=gr.Textbox(label="Search Model"), outputs="dataframe").launch() |