linjunyao
added leaderboard data; added Class coloring
0bb476f
raw
history blame
12.9 kB
import abc
import gradio as gr
from loguru import logger
import pandas as pd
from collections import defaultdict
from judgerbench.preprocess.gen_table import (
format_timestamp,
generate_table,
build_l1_df,
# build_l2_df,
)
from judgerbench.meta_data import (
LEADERBORAD_INTRODUCTION,
LEADERBOARD_MD,
LEADERBOARD_FILE_MAPPING,
MAIN_FIELDS,
DEFAULT_BENCH,
STYLE_CLASS_MAPPING,
CITATION_BUTTON_TEXT,
CITATION_BUTTON_LABEL,
)
def refresh_dataframe(required_fields):
df = generate_table(MAIN_FIELDS)
comp = gr.DataFrame(
value=df,
type='pandas',
interactive=False,
visible=True
)
return comp
with gr.Blocks() as demo:
# struct = load_results()
# timestamp = struct['time']
# EVAL_TIME = format_timestamp(timestamp)
EVAL_TIME = '20241015'
# results = struct['results']
# N_MODEL = len(results)
# N_DATA = len(results['LLaVA-v1.5-7B']) - 1
N_MODEL = 10
N_DATA = 100
# DATASETS = list(results['LLaVA-v1.5-7B'])
# DATASETS.remove('META')
# print(DATASETS)
gr.Markdown(LEADERBORAD_INTRODUCTION.format(
# N_MODEL,
# N_DATA,
EVAL_TIME
))
# structs = [abc.abstractproperty() for _ in range(N_DATA)]
with gr.Tabs(elem_classes='tab-buttons') as tabs:
for cur_id, (filename, filepath) in enumerate(LEADERBOARD_FILE_MAPPING.items()):
tab_name = filename
if filename == "overall":
tab_name = '๐Ÿ… JudgerBench Main Leaderboard'
with gr.Tab(tab_name, elem_id=f'tab_{cur_id}', id=cur_id):
# gr.Markdown(LEADERBOARD_MD['MAIN'])
# _, check_box = build_l1_df(MAIN_FIELDS)
table = generate_table(filename=filename)
# type_map = check_box['type_map']
type_map = defaultdict(lambda: 'number')
type_map['Model'] = 'str'
type_map['Class'] = 'str'
type_map['Rank'] = 'number'
# required_fields = gr.State(
# check_box['essential']
# # + ["Average"]
# )
# checkbox_group = gr.CheckboxGroup(
# choices=[item for item in check_box['all'] if item not in required_fields.value],
# value=[item for item in check_box['default'] if item not in required_fields.value],
# label='Evaluation Metrics',
# interactive=True,
# )
# headers = (
# ['Rank'] +
# required_fields.value +
# [item for item in check_box['all'] if item not in required_fields.value]
# # checkbox_group.value
# )
table['Rank'] = list(range(1, len(table) + 1))
# Rearrange columns
if "Class" in table.columns:
starting_columns = ["Rank", "Models", "Class"]
else:
starting_columns = ["Rank", "Models"]
table = table[starting_columns + [ col for col in table.columns if col not in starting_columns ]]
headers = (
# ['Rank'] +
list(table.columns)
)
if "Class" in table.columns:
def cell_styler(v):
df = v.copy()
class_var = df[['Class']].copy()
df.loc[:, :] = ''
df[['Class']] = class_var.map(lambda x: f"background-color: {STYLE_CLASS_MAPPING[x]}")
logger.info(df['Class'])
return df
table_styler = (
table.style.apply(cell_styler, axis=None)
.format(precision=3)
)
else:
table_styler = table.style.format(prevision=3)
# with gr.Row():
# model_size = gr.CheckboxGroup(
# choices=MODEL_SIZE,
# value=MODEL_SIZE,
# label='Model Size',
# interactive=True
# )
# model_type = gr.CheckboxGroup(
# choices=MODEL_TYPE,
# value=MODEL_TYPE,
# label='Model Type',
# interactive=True
# )
data_component = gr.DataFrame(
value=table_styler,
type='pandas',
datatype=[type_map[x] for x in headers],
interactive=False,
visible=True
)
def filter_df(
required_fields,
fields,
# model_size,
# model_type
):
# filter_list = ['Avg Score', 'Avg Rank', 'OpenSource', 'Verified']
headers = ['Rank'] + required_fields + fields
# new_fields = [field for field in fields if field not in filter_list]
df = generate_table(fields)
logger.info(f"{df.columns=}")
# df['flag'] = [model_size_flag(x, model_size) for x in df['Param (B)']]
# df = df[df['flag']]
# df.pop('flag')
# if len(df):
# df['flag'] = [model_type_flag(df.iloc[i], model_type) for i in range(len(df))]
# df = df[df['flag']]
# df.pop('flag')
df['Rank'] = list(range(1, len(df) + 1))
comp = gr.DataFrame(
value=df[headers],
type='pandas',
datatype=[type_map[x] for x in headers],
interactive=False,
visible=True
)
return comp
# for cbox in [
# # checkbox_group,
# # model_size,
# # model_type
# ]:
# cbox.change(
# fn=refresh_dataframe,
# inputs=[required_fields],
# outputs=data_component
# ).then(
# fn=filter_df,
# inputs=[
# required_fields,
# checkbox_group,
# # model_size,
# # model_type
# ],
# outputs=data_component
# )
# with gr.Tab('๐Ÿ” About', elem_id='about', id=1):
# gr.Markdown(urlopen(VLMEVALKIT_README).read().decode())
# for i, dataset in enumerate(DATASETS):
# with gr.Tab(f'๐Ÿ“Š {dataset} Leaderboard', elem_id=dataset, id=i + 2):
# if dataset in LEADERBOARD_MD:
# gr.Markdown(LEADERBOARD_MD[dataset])
# s = structs[i]
# s.table, s.check_box = build_l2_df(results, dataset)
# s.type_map = s.check_box['type_map']
# s.type_map['Rank'] = 'number'
# s.checkbox_group = gr.CheckboxGroup(
# choices=s.check_box['all'],
# value=s.check_box['required'],
# label=f'{dataset} CheckBoxes',
# interactive=True,
# )
# s.headers = ['Rank'] + s.check_box['essential'] + s.checkbox_group.value
# s.table['Rank'] = list(range(1, len(s.table) + 1))
# with gr.Row():
# s.model_size = gr.CheckboxGroup(
# choices=MODEL_SIZE,
# value=MODEL_SIZE,
# label='Model Size',
# interactive=True
# )
# s.model_type = gr.CheckboxGroup(
# choices=MODEL_TYPE,
# value=MODEL_TYPE,
# label='Model Type',
# interactive=True
# )
# s.data_component = gr.components.DataFrame(
# value=s.table[s.headers],
# type='pandas',
# datatype=[s.type_map[x] for x in s.headers],
# interactive=False,
# visible=True)
# s.dataset = gr.Textbox(value=dataset, label=dataset, visible=False)
# def filter_df_l2(dataset_name, fields, model_size, model_type):
# s = structs[DATASETS.index(dataset_name)]
# headers = ['Rank'] + s.check_box['essential'] + fields
# df = cp.deepcopy(s.table)
# df['flag'] = [model_size_flag(x, model_size) for x in df['Param (B)']]
# df = df[df['flag']]
# df.pop('flag')
# if len(df):
# df['flag'] = [model_type_flag(df.iloc[i], model_type) for i in range(len(df))]
# df = df[df['flag']]
# df.pop('flag')
# df['Rank'] = list(range(1, len(df) + 1))
# comp = gr.components.DataFrame(
# value=df[headers],
# type='pandas',
# datatype=[s.type_map[x] for x in headers],
# interactive=False,
# visible=True)
# return comp
# for cbox in [s.checkbox_group, s.model_size, s.model_type]:
# cbox.change(
# fn=filter_df_l2,
# inputs=[s.dataset, s.checkbox_group, s.model_size, s.model_type],
# outputs=s.data_component)
with gr.Row():
with gr.Accordion('Citation', open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id='citation-button')
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="0.0.0.0")
parser.add_argument("--port", type=int)
parser.add_argument(
"--share",
action="store_true",
help="Whether to generate a public, shareable link",
)
parser.add_argument(
"--concurrency-count",
type=int,
default=10,
help="The concurrency count of the gradio queue",
)
parser.add_argument(
"--max-threads",
type=int,
default=200,
help="The maximum number of threads available to process non-async functions.",
)
# parser.add_argument(
# "--gradio-auth-path",
# type=str,
# help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"',
# default=None,
# )
parser.add_argument(
"--gradio-root-path",
type=str,
help="Sets the gradio root path, eg /abc/def. Useful when running behind a reverse-proxy or at a custom URL path prefix",
)
parser.add_argument(
"--ga-id",
type=str,
help="the Google Analytics ID",
default=None,
)
parser.add_argument(
"--use-remote-storage",
action="store_true",
default=False,
help="Uploads image files to google cloud storage if set to true",
)
args = parser.parse_args()
logger.info(f"args: {args}")
# Set authorization credentials
# auth = None
# if args.gradio_auth_path is not None:
# auth = parse_gradio_auth_creds(args.gradio_auth_path)
demo.queue(
default_concurrency_limit=args.concurrency_count,
status_update_rate=10,
api_open=False,
).launch(
server_name=args.host,
server_port=args.port,
share=args.share,
max_threads=args.max_threads,
# auth=auth,
root_path=args.gradio_root_path,
# debug=True,
show_error=True,
allowed_paths=["../.."]
)