yajuniverse commited on
Commit
8481fed
·
verified ·
1 Parent(s): b2de968

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +32 -70
config.py CHANGED
@@ -1,74 +1,36 @@
1
- import gradio as gr
2
- from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
3
- import config
4
- from pathlib import Path
5
  import pandas as pd
6
- from datetime import datetime
7
 
8
- abs_path = Path(__file__).parent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
- df = pd.read_json(str(abs_path / "leader_board.json"))
12
- # Randomly set True/ False for the "MOE" column
13
- #
14
- # print(df.info())
15
- # print(df.columns)
16
- # print(df.head(1))
17
- head_content = """
18
- # 🏅 BlinkCode Leaderboard
19
- ### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](github.link).
20
-
21
- ### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task.
22
- ## Main Evaluation Results
23
- - Metrics:
24
- - Avg Score: The average score on all task (normalized to 0 - 100, the higher the better).
25
- - The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy.
26
- - The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better).
27
- - By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️.
28
- - The ⭐ symbol indicates results that have undergone two rounds of refinement.
29
-
30
-
31
- This leaderboard was last updated: <nowtime>.
32
- """
33
- CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
34
- title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
35
- author={OpenCompass Contributors},
36
- howpublished = {\url{https://github.com/open-compass/opencompass}},
37
- year={2023}
38
- }"""
39
- CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
40
- unique_models_count = df["Model"].nunique()
41
- # print(unique_models_count)
42
- nowtime = datetime.now()
43
- formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S")
44
- head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count))
45
-
46
- with gr.Blocks() as demo:
47
- gr.Markdown(head_content)
48
- with gr.Tabs():
49
- Leaderboard(
50
- value=df,
51
- select_columns=SelectColumns(
52
- default_selection=config.ON_LOAD_COLUMNS,
53
- cant_deselect=["Rank", "Model"],
54
- label="Select Columns to Display:",
55
- ),
56
- search_columns=["Model", "Model Type"],
57
- hide_columns=["Model Size", "Model Type", "Supports multiple images"],
58
- filter_columns=[
59
- "Model Size",
60
- "Model Type",
61
- "Supports multiple images"
62
- # ColumnFilter("Params (B)", default=[0, 20]),
63
- ],
64
- datatype=config.TYPES,
65
- column_widths=["5%", "15%"],
66
- )
67
- with gr.Row():
68
- with gr.Accordion('Citation', open=False):
69
- citation_button = gr.Textbox(
70
- value=CITATION_BUTTON_TEXT,
71
- label=CITATION_BUTTON_LABEL,
72
- elem_id='citation-button')
73
- if __name__ == "__main__":
74
- demo.launch()
 
 
 
 
 
1
  import pandas as pd
 
2
 
3
+ TYPES = [
4
+ "number", # Rank
5
+ "str", # Model Type
6
+ "str", # Model Size
7
+ "str", # Model
8
+ "str", # Param (B)
9
+ "bool", # Supports multiple images
10
+ "number" # Avg Score
11
+ "number", # HumanEval-V
12
+ "number", # MBPP-V
13
+ "number", # GSM8K-V
14
+ "number", # MATH-V
15
+ "number", # VP
16
+ "number", # Matplotlib
17
+ "number", # SVG
18
+ "number", # TikZ
19
+ "number", # Webpage
20
+ "number" # Avg Score
21
+ "number", # HumanEval-V
22
+ "number", # MBPP-V
23
+ "number", # GSM8K-V
24
+ "number", # MATH-V
25
+ "number", # VP
26
+ "number", # Matplotlib
27
+ "number", # SVG
28
+ "number", # TikZ
29
+ "number", # Webpage
30
+ ]
31
 
32
 
33
+ ON_LOAD_COLUMNS = [
34
+ "Rank", "Model", "Params (B)", "Avg Score⬆️", "HumanEval-V", "MBPP-V", "GSM8K-V", "MATH-V", "VP", "Matplotlib", "SVG", "TikZ", "Webpage"
35
+ ]
36
+ # "Avg Score⬆️", "HumanEval-V", "MBPP-V", "GSM8K-V", "MATH-V", "VP", "Matplotlib", "SVG", "TikZ", "Webpage"