yajuniverse's picture
Update app.py
1c42574 verified
raw
history blame
3.16 kB
import gradio as gr
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
import config
from pathlib import Path
import pandas as pd
from datetime import datetime
abs_path = Path(__file__).parent
df = pd.read_json(str(abs_path / "leader_board.json"))
# Randomly set True/ False for the "MOE" column
#
# print(df.info())
# print(df.columns)
# print(df.head(1))
head_content = """
# 🏅 BlinkCode Leaderboard
### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](github.link).
### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task.
## Main Evaluation Results
- Metrics:
- Avg Score: The average score on all task (normalized to 0 - 100, the higher the better).
- The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy.
- The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better).
- By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️.
- The ⭐ symbol indicates results that have undergone two rounds of refinement.
This leaderboard was last updated: <nowtime>.
"""
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
author={OpenCompass Contributors},
howpublished = {\url{https://github.com/open-compass/opencompass}},
year={2023}
}"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
unique_models_count = df["Model"].nunique()
# print(unique_models_count)
nowtime = datetime.now()
formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S")
head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count))
with gr.Blocks() as demo:
gr.Markdown(head_content)
with gr.Tabs():
Leaderboard(
value=df,
select_columns=SelectColumns(
default_selection=config.ON_LOAD_COLUMNS,
cant_deselect=["Rank", "Model"],
label="Select Columns to Display:",
),
search_columns=["Model", "Model Type"],
hide_columns=["Model Size", "Model Type", "Supports multiple images"],
filter_columns=[
"Model Size",
"Model Type",
"Supports multiple images"
# ColumnFilter("Params (B)", default=[0, 20]),
],
datatype=config.TYPES,
column_widths=["5%", "15%"],
)
with gr.Row():
with gr.Accordion('Citation', open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id='citation-button')
if __name__ == "__main__":
demo.launch()