Spaces:
Sleeping
Sleeping
import gradio as gr | |
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter | |
import config | |
from pathlib import Path | |
import pandas as pd | |
from datetime import datetime | |
abs_path = Path(__file__).parent | |
df = pd.read_json(str(abs_path / "leader_board.json")) | |
# Randomly set True/ False for the "MOE" column | |
# | |
# print(df.info()) | |
# print(df.columns) | |
# print(df.head(1)) | |
head_content = """ | |
# 🏅 BlinkCode Leaderboard | |
### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](github.link). | |
### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task. | |
## Main Evaluation Results | |
- Metrics: | |
- Avg Score: The average score on all task (normalized to 0 - 100, the higher the better). | |
- The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy. | |
- The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better). | |
- By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️. | |
- The ⭐ symbol indicates results that have undergone two rounds of refinement. | |
This leaderboard was last updated: <nowtime>. | |
""" | |
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, | |
title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, | |
author={OpenCompass Contributors}, | |
howpublished = {\url{https://github.com/open-compass/opencompass}}, | |
year={2023} | |
}""" | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
unique_models_count = df["Model"].nunique() | |
# print(unique_models_count) | |
nowtime = datetime.now() | |
formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S") | |
head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count)) | |
with gr.Blocks() as demo: | |
gr.Markdown(head_content) | |
with gr.Tabs(): | |
Leaderboard( | |
value=df, | |
select_columns=SelectColumns( | |
default_selection=config.ON_LOAD_COLUMNS, | |
cant_deselect=["Rank", "Model"], | |
label="Select Columns to Display:", | |
), | |
search_columns=["Model", "Model Type"], | |
hide_columns=["Model Size", "Model Type", "Supports multiple images"], | |
filter_columns=[ | |
"Model Size", | |
"Model Type", | |
"Supports multiple images" | |
# ColumnFilter("Params (B)", default=[0, 20]), | |
], | |
datatype=config.TYPES, | |
column_widths=["5%", "15%"], | |
) | |
with gr.Row(): | |
with gr.Accordion('Citation', open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id='citation-button') | |
if __name__ == "__main__": | |
demo.launch() |