Spaces:
Running
Running
File size: 6,583 Bytes
5ccbe05 170a088 5ccbe05 7bd86a9 ae55c78 5ccbe05 43b5eac 5ccbe05 b2fc46e 2f7c171 097c217 2f7c171 d4bd893 5ccbe05 7bd86a9 2eb8cb0 5ccbe05 95cd467 5ccbe05 ae55c78 5ccbe05 ae55c78 6ffefdd ae55c78 429b741 b1030db ae55c78 b4a4293 ae55c78 429b741 170a088 43b5eac 7bd86a9 43b5eac 5ccbe05 ae55c78 649e5b3 ae55c78 170a088 ae55c78 097c217 170a088 ae55c78 170a088 ae55c78 f5b436e ae55c78 95cd467 6ffefdd 95cd467 5ccbe05 7bd86a9 5ccbe05 43b5eac 5ccbe05 7bd86a9 5ccbe05 7bd86a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
# import ast
# import argparse
# import glob
# import pickle
import gradio as gr
import numpy as np
import pandas as pd
import os
from collections import defaultdict
from matplotlib.colors import LinearSegmentedColormap
def make_default_md():
leaderboard_md = f"""
# πππͺ‘πβ BABILong Leaderboard π
[![Dataset on HF](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-lg.svg)](https://huggingface.co/datasets/booydar/babilong)
| [GitHub](https://github.com/booydar/babilong) | [Paper](https://arxiv.org/abs/2406.10149) | [HF Dataset](https://huggingface.co/datasets/booydar/babilong) | [HF Dataset 1k samples per task](https://huggingface.co/datasets/RMT-team/babilong-1k-samples) |
"""
return leaderboard_md
def make_arena_leaderboard_md(total_models):
leaderboard_md = f"""Total #models: **{total_models}**. Last updated: July 29, 2024."""
return leaderboard_md
def make_model_desc_md(f_len):
desc_md = make_arena_leaderboard_md(f_len)
models = next(os.walk('info'))[2]
for model in models:
model_name = model.split('.md')[0]
with open(os.path.join('info', model), 'r') as f:
description = f.read()
desc_md += f"\n\n### {model_name}\n{description}"
return desc_md
def model_hyperlink(model_name, link):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def load_model(folders, tab_name, msg_lengths):
results = defaultdict(list)
class NA():
def __repr__(self) -> str:
return '-'
def __float__(self):
return 0.0
mean_score = []
for i, folder in enumerate(folders):
model_name = folder.split('/')[-1]
if 'fine-tune' in model_name:
model_name += ' π οΈ'
if 'rag' in model_name.lower() or 'retrieve' in model_name.lower():
model_name += ' π'
results['Model'].append(model_name)
for task in msg_lengths:
if not os.path.isfile(f'{folder}/{tab_name}/{task}.csv'):
results[msg_lengths[task]].append(NA())
else:
df = pd.read_csv(f'{folder}/{tab_name}/{task}.csv')
results[msg_lengths[task]].append(int(df['result'].sum() / len(df) * 100))
mean_score.append(-np.mean([float(results[msg_lengths[task]][i]) for task in list(msg_lengths.keys())[:10]]))
res_df = pd.DataFrame(results)
lengths = list(msg_lengths.values())
res_df['mean_score'] = mean_score
res_df['num_lengths'] = -(res_df[lengths].astype(float) > 0).sum(axis=1)
res_df = res_df[res_df.num_lengths != 0]
res_df.sort_values(['num_lengths', 'mean_score'], inplace=True)
res_df['Rank'] = range(1, res_df.shape[0] + 1)
res_df['Avg β€32k'] = res_df[lengths[:5]].astype(float).fillna(0).mean(axis=1).astype(int)
res_df['Avg β€128k'] = res_df[lengths[:7]].astype(float).fillna(0).mean(axis=1).astype(int)
ordered_columns = ['Rank', 'Model', 'Avg β€32k', 'Avg β€128k'] + lengths
res_df = res_df[ordered_columns]
return res_df
def build_leaderboard_tab(folders):
default_md = make_default_md()
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
msg_lengths = {
'0': '0k',
'4000': '4k',
'8000': '8k',
'16000': '16k',
'32000': '32k',
'64000': '64k',
'128000': '128k',
'500000': '500k',
'1000000': '1M',
'10000000': '10M'
}
with gr.Tabs() as tabs:
for tab_id, tab_name in enumerate(['avg', 'qa1','qa2', 'qa3', 'qa4', 'qa5']):
df = load_model(folders, tab_name, msg_lengths)
cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
# df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values()))
df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=df.columns[2:])
# arena table
with gr.Tab(tab_name, id=tab_id):
md = make_arena_leaderboard_md(len(folders))
gr.Markdown(md, elem_id="leaderboard_markdown")
gr.Dataframe(
headers=[
"Rank",
"Model",
] + list(msg_lengths.values()) + ['Avg β€32k', 'Avg β€128k'],
datatype=[
"str",
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
column_widths=[20, 150] + [30] * 2 + [20] * len(msg_lengths),
wrap=True,
)
with gr.Tab("Description", id=tab_id + 1):
desc_md = make_model_desc_md(len(folders))
gr.Markdown(desc_md, elem_id="leaderboard_markdown")
return [md_1]
block_css = """
#notice_markdown {
font-size: 104%
}
#notice_markdown th {
display: none;
}
#notice_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_markdown {
font-size: 104%
}
#leaderboard_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_dataframe td {
line-height: 0.1em;
}
footer {
display:none !important
}
.image-container {
display: flex;
align-items: center;
padding: 1px;
}
.image-container img {
margin: 0 30px;
height: 20px;
max-height: 100%;
width: auto;
max-width: 20%;
}
"""
def build_demo(folders):
text_size = gr.themes.sizes.text_lg
with gr.Blocks(
title="Babilong leaderboard",
theme=gr.themes.Base(text_size=text_size),
css=block_css,
) as demo:
leader_components = build_leaderboard_tab(folders)
return demo
if __name__ == "__main__":
folders = [f'results/{folders}' for folders in os.listdir('results')]
demo = build_demo(folders)
demo.launch(share=False)
|