Spaces:
Sleeping
Sleeping
Update config.py
Browse files
config.py
CHANGED
@@ -1,74 +1,36 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
|
3 |
-
import config
|
4 |
-
from pathlib import Path
|
5 |
import pandas as pd
|
6 |
-
from datetime import datetime
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
#
|
15 |
-
# print(df.columns)
|
16 |
-
# print(df.head(1))
|
17 |
-
head_content = """
|
18 |
-
# 🏅 BlinkCode Leaderboard
|
19 |
-
### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](github.link).
|
20 |
-
|
21 |
-
### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task.
|
22 |
-
## Main Evaluation Results
|
23 |
-
- Metrics:
|
24 |
-
- Avg Score: The average score on all task (normalized to 0 - 100, the higher the better).
|
25 |
-
- The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy.
|
26 |
-
- The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better).
|
27 |
-
- By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️.
|
28 |
-
- The ⭐ symbol indicates results that have undergone two rounds of refinement.
|
29 |
-
|
30 |
-
|
31 |
-
This leaderboard was last updated: <nowtime>.
|
32 |
-
"""
|
33 |
-
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
|
34 |
-
title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
|
35 |
-
author={OpenCompass Contributors},
|
36 |
-
howpublished = {\url{https://github.com/open-compass/opencompass}},
|
37 |
-
year={2023}
|
38 |
-
}"""
|
39 |
-
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
40 |
-
unique_models_count = df["Model"].nunique()
|
41 |
-
# print(unique_models_count)
|
42 |
-
nowtime = datetime.now()
|
43 |
-
formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S")
|
44 |
-
head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count))
|
45 |
-
|
46 |
-
with gr.Blocks() as demo:
|
47 |
-
gr.Markdown(head_content)
|
48 |
-
with gr.Tabs():
|
49 |
-
Leaderboard(
|
50 |
-
value=df,
|
51 |
-
select_columns=SelectColumns(
|
52 |
-
default_selection=config.ON_LOAD_COLUMNS,
|
53 |
-
cant_deselect=["Rank", "Model"],
|
54 |
-
label="Select Columns to Display:",
|
55 |
-
),
|
56 |
-
search_columns=["Model", "Model Type"],
|
57 |
-
hide_columns=["Model Size", "Model Type", "Supports multiple images"],
|
58 |
-
filter_columns=[
|
59 |
-
"Model Size",
|
60 |
-
"Model Type",
|
61 |
-
"Supports multiple images"
|
62 |
-
# ColumnFilter("Params (B)", default=[0, 20]),
|
63 |
-
],
|
64 |
-
datatype=config.TYPES,
|
65 |
-
column_widths=["5%", "15%"],
|
66 |
-
)
|
67 |
-
with gr.Row():
|
68 |
-
with gr.Accordion('Citation', open=False):
|
69 |
-
citation_button = gr.Textbox(
|
70 |
-
value=CITATION_BUTTON_TEXT,
|
71 |
-
label=CITATION_BUTTON_LABEL,
|
72 |
-
elem_id='citation-button')
|
73 |
-
if __name__ == "__main__":
|
74 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
|
|
2 |
|
3 |
+
TYPES = [
|
4 |
+
"number", # Rank
|
5 |
+
"str", # Model Type
|
6 |
+
"str", # Model Size
|
7 |
+
"str", # Model
|
8 |
+
"str", # Param (B)
|
9 |
+
"bool", # Supports multiple images
|
10 |
+
"number" # Avg Score
|
11 |
+
"number", # HumanEval-V
|
12 |
+
"number", # MBPP-V
|
13 |
+
"number", # GSM8K-V
|
14 |
+
"number", # MATH-V
|
15 |
+
"number", # VP
|
16 |
+
"number", # Matplotlib
|
17 |
+
"number", # SVG
|
18 |
+
"number", # TikZ
|
19 |
+
"number", # Webpage
|
20 |
+
"number" # Avg Score
|
21 |
+
"number", # HumanEval-V
|
22 |
+
"number", # MBPP-V
|
23 |
+
"number", # GSM8K-V
|
24 |
+
"number", # MATH-V
|
25 |
+
"number", # VP
|
26 |
+
"number", # Matplotlib
|
27 |
+
"number", # SVG
|
28 |
+
"number", # TikZ
|
29 |
+
"number", # Webpage
|
30 |
+
]
|
31 |
|
32 |
|
33 |
+
ON_LOAD_COLUMNS = [
|
34 |
+
"Rank", "Model", "Params (B)", "Avg Score⬆️", "HumanEval-V", "MBPP-V", "GSM8K-V", "MATH-V", "VP", "Matplotlib", "SVG", "TikZ", "Webpage"
|
35 |
+
]
|
36 |
+
# "Avg Score⬆️", "HumanEval-V", "MBPP-V", "GSM8K-V", "MATH-V", "VP", "Matplotlib", "SVG", "TikZ", "Webpage"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|