Spaces:
Sleeping
Sleeping
Create config.py
Browse files
config.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
|
3 |
+
import config
|
4 |
+
from pathlib import Path
|
5 |
+
import pandas as pd
|
6 |
+
from datetime import datetime
|
7 |
+
|
8 |
+
abs_path = Path(__file__).parent
|
9 |
+
|
10 |
+
|
11 |
+
df = pd.read_json(str(abs_path / "leader_board.json"))
|
12 |
+
# Randomly set True/ False for the "MOE" column
|
13 |
+
#
|
14 |
+
# print(df.info())
|
15 |
+
# print(df.columns)
|
16 |
+
# print(df.head(1))
|
17 |
+
head_content = """
|
18 |
+
# 🏅 BlinkCode Leaderboard
|
19 |
+
### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](github.link).
|
20 |
+
|
21 |
+
### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task.
|
22 |
+
## Main Evaluation Results
|
23 |
+
- Metrics:
|
24 |
+
- Avg Score: The average score on all task (normalized to 0 - 100, the higher the better).
|
25 |
+
- The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy.
|
26 |
+
- The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better).
|
27 |
+
- By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️.
|
28 |
+
- The ⭐ symbol indicates results that have undergone two rounds of refinement.
|
29 |
+
|
30 |
+
|
31 |
+
This leaderboard was last updated: <nowtime>.
|
32 |
+
"""
|
33 |
+
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
|
34 |
+
title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
|
35 |
+
author={OpenCompass Contributors},
|
36 |
+
howpublished = {\url{https://github.com/open-compass/opencompass}},
|
37 |
+
year={2023}
|
38 |
+
}"""
|
39 |
+
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
40 |
+
unique_models_count = df["Model"].nunique()
|
41 |
+
# print(unique_models_count)
|
42 |
+
nowtime = datetime.now()
|
43 |
+
formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S")
|
44 |
+
head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count))
|
45 |
+
|
46 |
+
with gr.Blocks() as demo:
|
47 |
+
gr.Markdown(head_content)
|
48 |
+
with gr.Tabs():
|
49 |
+
Leaderboard(
|
50 |
+
value=df,
|
51 |
+
select_columns=SelectColumns(
|
52 |
+
default_selection=config.ON_LOAD_COLUMNS,
|
53 |
+
cant_deselect=["Rank", "Model"],
|
54 |
+
label="Select Columns to Display:",
|
55 |
+
),
|
56 |
+
search_columns=["Model", "Model Type"],
|
57 |
+
hide_columns=["Model Size", "Model Type", "Supports multiple images"],
|
58 |
+
filter_columns=[
|
59 |
+
"Model Size",
|
60 |
+
"Model Type",
|
61 |
+
"Supports multiple images"
|
62 |
+
# ColumnFilter("Params (B)", default=[0, 20]),
|
63 |
+
],
|
64 |
+
datatype=config.TYPES,
|
65 |
+
column_widths=["5%", "15%"],
|
66 |
+
)
|
67 |
+
with gr.Row():
|
68 |
+
with gr.Accordion('Citation', open=False):
|
69 |
+
citation_button = gr.Textbox(
|
70 |
+
value=CITATION_BUTTON_TEXT,
|
71 |
+
label=CITATION_BUTTON_LABEL,
|
72 |
+
elem_id='citation-button')
|
73 |
+
if __name__ == "__main__":
|
74 |
+
demo.launch()
|