File size: 3,160 Bytes
d5550ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
import config
from pathlib import Path
import pandas as pd
from datetime import datetime

abs_path = Path(__file__).parent


df = pd.read_json(str(abs_path / "leader_board.json"))
# Randomly set True/ False for the "MOE" column
#
# print(df.info())
# print(df.columns)
# print(df.head(1))
head_content = """
    # 🏅 BlinkCode Leaderboard
    ### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](github.link).

    ### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task.
    ## Main Evaluation Results
    - Metrics:
      - Avg Score: The average score on all task (normalized to 0 - 100, the higher the better). 
      - The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy.
      - The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better).
    - By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️.   
      -  The ⭐ symbol indicates results that have undergone two rounds of refinement.  
        
        
    This leaderboard was last updated: <nowtime>.
    """
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
    title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
    author={OpenCompass Contributors},
    howpublished = {\url{https://github.com/open-compass/opencompass}},
    year={2023}
}"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
unique_models_count = df["Model"].nunique()
# print(unique_models_count)
nowtime = datetime.now()
formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S")
head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count))

with gr.Blocks() as demo:
    gr.Markdown(head_content)
    with gr.Tabs():
        Leaderboard(
            value=df,
            select_columns=SelectColumns(
                default_selection=config.ON_LOAD_COLUMNS,
                cant_deselect=["Rank", "Model"],
                label="Select Columns to Display:",
            ),
            search_columns=["Model", "Model Type"],
            hide_columns=["Model Size", "Model Type", "Supports multiple images"],
            filter_columns=[
                "Model Size",
                "Model Type",
                "Supports multiple images"
                # ColumnFilter("Params (B)", default=[0, 20]),
            ],
            datatype=config.TYPES,
            column_widths=["5%", "15%"],
        )
    with gr.Row():
        with gr.Accordion('Citation', open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                elem_id='citation-button')
if __name__ == "__main__":
    demo.launch()