Spaces:
Running
Running
File size: 6,583 Bytes
5ccbe05 170a088 5ccbe05 7bd86a9 ae55c78 5ccbe05 43b5eac 5ccbe05 b2fc46e 2f7c171 097c217 2f7c171 d4bd893 5ccbe05 7bd86a9 2eb8cb0 5ccbe05 95cd467 5ccbe05 ae55c78 5ccbe05 ae55c78 6ffefdd ae55c78 429b741 b1030db ae55c78 b4a4293 ae55c78 429b741 170a088 43b5eac 7bd86a9 43b5eac 5ccbe05 ae55c78 649e5b3 ae55c78 170a088 ae55c78 097c217 170a088 ae55c78 170a088 ae55c78 f5b436e ae55c78 95cd467 6ffefdd 95cd467 5ccbe05 7bd86a9 5ccbe05 43b5eac 5ccbe05 7bd86a9 5ccbe05 7bd86a9 |
|
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
# import ast
# import argparse
# import glob
# import pickle
import gradio as gr
import numpy as np
import pandas as pd
import os
from collections import defaultdict
from matplotlib.colors import LinearSegmentedColormap
def make_default_md():
leaderboard_md = f"""
# πππͺ‘πβ BABILong Leaderboard π
[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-lg.svg)](https://huggingface.co/datasets/booydar/babilong)
| [GitHub](https://github.com/booydar/babilong) | [Paper](https://arxiv.org/abs/2406.10149) | [HF Dataset](https://huggingface.co/datasets/booydar/babilong) | [HF Dataset 1k samples per task](https://huggingface.co/datasets/RMT-team/babilong-1k-samples) |
"""
return leaderboard_md
def make_arena_leaderboard_md(total_models):
leaderboard_md = f"""Total #models: **{total_models}**. Last updated: July 29, 2024."""
return leaderboard_md
def make_model_desc_md(f_len):
desc_md = make_arena_leaderboard_md(f_len)
models = next(os.walk('info'))[2]
for model in models:
model_name = model.split('.md')[0]
with open(os.path.join('info', model), 'r') as f:
description = f.read()
desc_md += f"\n\n### {model_name}\n{description}"
return desc_md
def model_hyperlink(model_name, link):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def load_model(folders, tab_name, msg_lengths):
results = defaultdict(list)
class NA():
def __repr__(self) -> str:
return '-'
def __float__(self):
return 0.0
mean_score = []
for i, folder in enumerate(folders):
model_name = folder.split('/')[-1]
if 'fine-tune' in model_name:
model_name += ' π οΈ'
if 'rag' in model_name.lower() or 'retrieve' in model_name.lower():
model_name += ' π'
results['Model'].append(model_name)
for task in msg_lengths:
if not os.path.isfile(f'{folder}/{tab_name}/{task}.csv'):
results[msg_lengths[task]].append(NA())
else:
df = pd.read_csv(f'{folder}/{tab_name}/{task}.csv')
results[msg_lengths[task]].append(int(df['result'].sum() / len(df) * 100))
mean_score.append(-np.mean([float(results[msg_lengths[task]][i]) for task in list(msg_lengths.keys())[:10]]))
res_df = pd.DataFrame(results)
lengths = list(msg_lengths.values())
res_df['mean_score'] = mean_score
res_df['num_lengths'] = -(res_df[lengths].astype(float) > 0).sum(axis=1)
res_df = res_df[res_df.num_lengths != 0]
res_df.sort_values(['num_lengths', 'mean_score'], inplace=True)
res_df['Rank'] = range(1, res_df.shape[0] + 1)
res_df['Avg β€32k'] = res_df[lengths[:5]].astype(float).fillna(0).mean(axis=1).astype(int)
res_df['Avg β€128k'] = res_df[lengths[:7]].astype(float).fillna(0).mean(axis=1).astype(int)
ordered_columns = ['Rank', 'Model', 'Avg β€32k', 'Avg β€128k'] + lengths
res_df = res_df[ordered_columns]
return res_df
def build_leaderboard_tab(folders):
default_md = make_default_md()
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
msg_lengths = {
'0': '0k',
'4000': '4k',
'8000': '8k',
'16000': '16k',
'32000': '32k',
'64000': '64k',
'128000': '128k',
'500000': '500k',
'1000000': '1M',
'10000000': '10M'
}
with gr.Tabs() as tabs:
for tab_id, tab_name in enumerate(['avg', 'qa1','qa2', 'qa3', 'qa4', 'qa5']):
df = load_model(folders, tab_name, msg_lengths)
cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
# df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values()))
df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=df.columns[2:])
# arena table
with gr.Tab(tab_name, id=tab_id):
md = make_arena_leaderboard_md(len(folders))
gr.Markdown(md, elem_id="leaderboard_markdown")
gr.Dataframe(
headers=[
"Rank",
"Model",
] + list(msg_lengths.values()) + ['Avg β€32k', 'Avg β€128k'],
datatype=[
"str",
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
column_widths=[20, 150] + [30] * 2 + [20] * len(msg_lengths),
wrap=True,
)
with gr.Tab("Description", id=tab_id + 1):
desc_md = make_model_desc_md(len(folders))
gr.Markdown(desc_md, elem_id="leaderboard_markdown")
return [md_1]
block_css = """
#notice_markdown {
font-size: 104%
}
#notice_markdown th {
display: none;
}
#notice_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_markdown {
font-size: 104%
}
#leaderboard_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_dataframe td {
line-height: 0.1em;
}
footer {
display:none !important
}
.image-container {
display: flex;
align-items: center;
padding: 1px;
}
.image-container img {
margin: 0 30px;
height: 20px;
max-height: 100%;
width: auto;
max-width: 20%;
}
"""
def build_demo(folders):
text_size = gr.themes.sizes.text_lg
with gr.Blocks(
title="Babilong leaderboard",
theme=gr.themes.Base(text_size=text_size),
css=block_css,
) as demo:
leader_components = build_leaderboard_tab(folders)
return demo
if __name__ == "__main__":
folders = [f'results/{folders}' for folders in os.listdir('results')]
demo = build_demo(folders)
demo.launch(share=False)
|