Spaces:
Runtime error
Runtime error
File size: 6,970 Bytes
9346f1c 8b28d2b 9346f1c 4596a70 2a5f9fb 6763f93 b401537 83431d1 09c7b10 2a5f9fb 1ffc326 8c49cb6 7ecad78 8c49cb6 acd8e8a b351f32 acd8e8a df66f6e 37b74a1 90e7e40 37b74a1 2f420b7 8c49cb6 2a73469 10f9b3c 50df158 d084b26 37b74a1 8b28d2b d084b26 046ddc7 d084b26 37b74a1 d084b26 37b74a1 d084b26 26286b2 a885f09 35850bf 2a73469 f5f1257 a6fb1be f5f1257 e7b07cd e06017b f79866b f71c197 a6fb1be f71c197 a6fb1be 086b308 a6fb1be fc52117 614ee1f 19209dc 35850bf 4f5021e 83431d1 944c822 e0cae03 a6fb1be e7b07cd 35850bf 90e7e40 83431d1 35850bf f71c197 f79866b 35850bf f71c197 e7b07cd 70a66a1 f5f1257 5270ff5 35850bf 5270ff5 19209dc 5270ff5 f79866b 3884333 229e191 e06017b b401537 f79866b b62ae51 b401537 086b308 b401537 f79866b d3ac26d f79866b b401537 35850bf 00599d4 c96beeb 00599d4 83431d1 b62ae51 35850bf 0b77b16 becab7c 0b77b16 becab7c 0b080ed becab7c 8ed80aa becab7c 0b77b16 83431d1 4b72f1c 0b77b16 f257792 0b77b16 01233b7 58733e4 7ecad78 6e8f400 10f9b3c 8cb7546 982779d 35850bf f2bc0a5 becab7c 0227006 9a93ff5 2941860 d16cee2 10f9b3c f46cbb5 10f9b3c 37b74a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from gradio.components.textbox import Textbox
from gradio.components.dataframe import Dataframe
from gradio.components.checkboxgroup import CheckboxGroup
import copy
# from fastchat.serve.monitor.monitor import build_leaderboard_tab, build_basic_stats_tab, basic_component_values, leader_component_values
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
LINKS,
)
from src.display.css_html_js import (
custom_css,
CSS_EXTERNAL,
JS_EXTERNAL,
)
from src.display.utils import (
AutoEvalColumn,
fields,
)
from src.envs import (
API,
EVAL_DETAILED_RESULTS_PATH,
EVAL_RESULTS_PATH,
EVAL_DETAILED_RESULTS_REPO,
REPO_ID,
RESULTS_REPO,
TOKEN,
NEWEST_VERSION,
)
from src.populate import get_leaderboard_df
def restart_space():
API.restart_space(repo_id=REPO_ID)
### Space initialisation
try:
print(EVAL_DETAILED_RESULTS_REPO)
snapshot_download(
repo_id=EVAL_DETAILED_RESULTS_REPO,
local_dir=EVAL_DETAILED_RESULTS_PATH,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=TOKEN,
)
except Exception:
restart_space()
try:
print(EVAL_RESULTS_PATH)
snapshot_download(
repo_id=RESULTS_REPO,
local_dir=EVAL_RESULTS_PATH,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=TOKEN,
)
except Exception:
restart_space()
LEADERBOARD_DF = get_leaderboard_df(RESULTS_REPO)
def GET_DEFAULT_TEXTBOX():
return gr.Textbox("", placeholder="π Search Models... [press enter]", label="Filter Models by Name")
def GET_DEFAULT_CHECKBOX(subset):
choices = list(LEADERBOARD_DF[subset].columns)
print("Choices:", choices)
choices.remove("Model Name")
# print("Choices:", [c.name for c in fields(AutoEvalColumn) if not c.hidden])
return gr.CheckboxGroup(
choices=choices,
label="Select Columns to Display",
value=choices,
)
old_version = NEWEST_VERSION
def init_leaderboard(dataframes):
subsets = list(reversed(list(dataframes.keys())))
with gr.Row():
selected_subset = gr.Dropdown(choices=subsets, label="Select Dataset Subset", value=NEWEST_VERSION)
research_textbox = GET_DEFAULT_TEXTBOX()
selected_columns = GET_DEFAULT_CHECKBOX(NEWEST_VERSION)
data = dataframes[NEWEST_VERSION]
with gr.Row():
# datatype =
df = gr.Dataframe(data, type="pandas")
def refresh(subset):
global LEADERBOARD_DF
LEADERBOARD_DF = get_leaderboard_df(RESULTS_REPO)
# default_columns = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default]
default_columns = list(LEADERBOARD_DF[subset].columns)
# default_columns.remove("Model Name")
# return update_data(subset, None, default_columns), GET_DEFAULT_TEXTBOX(), GET_DEFAULT_CHECKBOX(subset)
return update_data(subset, None, default_columns, force_refresh=True)
def update_data(subset, search_term, selected_columns, force_refresh=False):
global old_version
if old_version != subset or force_refresh:
search_term = None
selected_columns = GET_DEFAULT_CHECKBOX(subset)
print("Subset:", subset)
print("Search Term:", search_term)
print("Selected Columns:", selected_columns)
if isinstance(selected_columns, CheckboxGroup):
print("Selected Columns:", selected_columns.choices)
bak_selected_columns = copy.deepcopy(selected_columns)
old_version = subset
filtered_data = dataframes[subset]
if search_term:
filtered_data = filtered_data[dataframes[subset]["Model Name"].str.contains(search_term, case=False)]
filtered_data.sort_values(by="Total", ascending=False, inplace=True)
# selected_columns.append("Model Name")
if isinstance(selected_columns, CheckboxGroup):
selected_columns = selected_columns.choices
if isinstance(selected_columns[0], tuple):
selected_columns = [c[1] for c in selected_columns]
print("Selected Columns:", selected_columns)
selected_columns = [
c for c in filtered_data.columns if c in selected_columns or c == "Model Name"
]
# selected_columns = [c.name for c in fields(AutoEvalColumn) if c.name in selected_columns]
selected_data = filtered_data[selected_columns]
return gr.DataFrame(
selected_data,
type="pandas",
# datatype=[c.type for c in fields(AutoEvalColumn) if c.name in selected_columns],
), research_textbox, bak_selected_columns
with gr.Row():
refresh_button = gr.Button("Refresh")
refresh_button.click(
refresh,
inputs=[
selected_subset,
],
outputs=[df, research_textbox, selected_columns],
concurrency_limit=20,
)
selected_subset.change(update_data, inputs=[selected_subset, research_textbox, selected_columns], outputs=[df, research_textbox, selected_columns])
research_textbox.submit(update_data, inputs=[selected_subset, research_textbox, selected_columns], outputs=[df, research_textbox, selected_columns])
selected_columns.change(update_data, inputs=[selected_subset, research_textbox, selected_columns], outputs=[df, research_textbox, selected_columns])
def init_detailed_results():
with gr.Row():
gr.HTML(
"""\
<iframe
src="https://huggingface.co/datasets/lmms-lab/LiveBenchDetailedResults/embed/viewer/"
frameborder="0"
width="100%"
height="800px"
></iframe>
"""
)
HEAD = "".join(
[f'<link rel="stylesheet" href="{css}">' for css in CSS_EXTERNAL]
+ [f'<script src="{js}" crossorigin="anonymous"></script>' for js in JS_EXTERNAL]
)
demo = gr.Blocks(css=custom_css, head=HEAD)
with demo:
gr.HTML(TITLE)
gr.HTML(LINKS)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π
LiveBench Results", elem_id="llm-benchmark-tab-table", id=0):
init_leaderboard(LEADERBOARD_DF)
with gr.TabItem("π Detailed Results", elem_id="llm-benchmark-tab-table", id=2):
init_detailed_results()
with gr.Row():
with gr.Accordion("π Citation", open=False):
gr.Markdown("```bib\n" + CITATION_BUTTON_TEXT + "\n```")
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=3600)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch()
|