myhs commited on
Commit
12f6654
·
verified ·
1 Parent(s): 06ba0a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -197
app.py CHANGED
@@ -1,204 +1,207 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
-
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
- from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- fields,
24
- WeightType,
25
- Precision
26
- )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
-
31
-
32
- def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
-
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
-
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
-
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
90
-
91
-
92
- demo = gr.Blocks(css=custom_css)
93
- with demo:
94
- gr.HTML(TITLE)
95
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
-
97
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
100
-
101
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
 
 
 
 
 
 
108
 
109
  with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
-
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
- with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
-
147
- with gr.Row():
148
- with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
  )
158
 
159
- with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
-
176
- submit_button = gr.Button("Submit Eval")
177
- submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
189
- )
190
-
191
- with gr.Row():
192
- with gr.Accordion("📙 Citation", open=False):
193
- citation_button = gr.Textbox(
194
- value=CITATION_BUTTON_TEXT,
195
- label=CITATION_BUTTON_LABEL,
196
- lines=20,
197
- elem_id="citation-button",
198
- show_copy_button=True,
199
- )
200
-
201
- scheduler = BackgroundScheduler()
202
- scheduler.add_job(restart_space, "interval", seconds=1800)
203
- scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
1
  import gradio as gr
2
+ import json
3
  import pandas as pd
4
+ from urllib.request import urlopen
5
+ from urllib.error import URLError
6
+ import re
7
+ from datetime import datetime
8
+
9
+ # Constants
10
+ CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
11
+ title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
12
+ author={OpenCompass Contributors},
13
+ howpublished = {\url{https://github.com/open-compass/opencompass}},
14
+ year={2023}
15
+ }"""
16
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
17
+ # 开发环境
18
+ # DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/research-rank/research-data.REALTIME."
19
+ # DATA_URL_BASE = "./s1test"
20
+ # 生产环境
21
+ DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME."
22
+
23
+
24
+ def find_latest_data_url():
25
+ """Find the latest available data URL by trying different dates."""
26
+ today = datetime.now()
27
+ for i in range(365):
28
+ date = today.replace(day=today.day - i)
29
+ date_str = date.strftime("%Y%m%d")
30
+ url = f"{DATA_URL_BASE}{date_str}.json"
31
+ try:
32
+ urlopen(url)
33
+ return url, date_str
34
+ except URLError:
35
+ continue
36
+ breakpoint()
37
+ return None, None
38
+
39
+
40
+ def get_latest_data():
41
+ """Get latest data URL and update time"""
42
+ data_url, update_time = find_latest_data_url()
43
+ if not data_url:
44
+ raise Exception("Could not find valid data URL")
45
+ formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d")
46
+ return data_url, formatted_update_time
47
+
48
+
49
+ def get_leaderboard_title(update_time):
50
+ return f"# Supported Datasets List (Last Updated: {update_time})"
51
+
52
+
53
+ MAIN_DESCRIPTION = """## The List of Datasets Supported by OpenCompass
54
+ Testing line.
55
+ - All configurations and datsets can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
56
+ """
57
+
58
+
59
+
60
+
61
+ def load_data(data_url):
62
+ response = urlopen(data_url)
63
+
64
+ with open('s1.json','r',encoding='utf8') as f:
65
+ data = json.load(f)
66
+
67
+ return data
68
+
69
+
70
+ def build_main_table(data):
71
+ df = pd.DataFrame(data).transpose()
72
+ columns = {
73
+ 'name': 'Name', 'category': 'Category', 'article': 'Article Address',
74
+ }
75
+ df = df[list(columns.keys())].rename(columns=columns)
76
+ return df
77
+
78
+
79
+ DATA_CATEGORY = ['med', 'law', 'code']
80
+
81
+ def filter_table1(df, data_category):
82
+ filtered_df = df.copy()
83
+ if data_category:
84
+ mask = pd.Series(False, index=filtered_df.index)
85
+ for category in data_category:
86
+ mask |= filtered_df['Category'] == category
87
+ filtered_df = filtered_df[mask]
88
+
89
+ return filtered_df
90
+
91
+
92
+ def calculate_column_widths(df):
93
+ column_widths = []
94
+ for column in df.columns:
95
+ header_length = len(str(column))
96
+ max_content_length = df[column].astype(str).map(len).max()
97
+ width = max(header_length * 10, max_content_length * 8) + 20
98
+ width = max(160, min(400, width))
99
+ column_widths.append(width)
100
+ return column_widths
101
+
102
+
103
+ class DataState:
104
+ def __init__(self):
105
+ self.current_df = None
106
+
107
+
108
+ data_state = DataState()
109
+
110
+
111
+ def create_interface():
112
+ empty_df = pd.DataFrame(columns=[
113
+ 'Name', 'Category', 'Article Address'
114
+ ])
115
+
116
+ def load_initial_data():
117
+ try:
118
+ data_url, update_time = get_latest_data()
119
+ data = load_data(data_url)
120
+ new_df = build_main_table(data)
121
+ data_state.current_df = new_df
122
+ filtered_df = filter_table1(new_df, DATA_CATEGORY)
123
+ return get_leaderboard_title(update_time), filtered_df.sort_values("Name", ascending=True)
124
+ except Exception as e:
125
+ print(f"Error loading initial data: {e}")
126
+ return "# Supported Datasets List (Error loading data)", empty_df
127
+
128
+ def refresh_data():
129
+ try:
130
+ data_url, update_time = get_latest_data()
131
+ data = load_data(data_url)
132
+ new_df = build_main_table(data)
133
+ data_state.current_df = new_df
134
+ filtered_df = filter_table1(new_df, DATA_CATEGORY)
135
+ return get_leaderboard_title(update_time), filtered_df.sort_values("Name", ascending=True)
136
+ except Exception as e:
137
+ print(f"Error refreshing data: {e}")
138
+ return None, None
139
+
140
+ def update_table(category):
141
+ if data_state.current_df is None:
142
+ return empty_df
143
+ filtered_df = filter_table1(data_state.current_df, category)
144
+ return filtered_df.sort_values("Name", ascending=True)
145
+
146
+ initial_title, initial_data = load_initial_data()
147
+
148
+ with gr.Blocks() as demo:
149
+ title_comp = gr.Markdown(initial_title)
150
+
151
+ with gr.Tabs() as tabs:
152
+ with gr.TabItem("Dataset List", elem_id='main'):
153
+ gr.Markdown(MAIN_DESCRIPTION)
154
+
155
  with gr.Row():
156
+ with gr.Column():
157
+ category_filter = gr.CheckboxGroup(
158
+ choices=DATA_CATEGORY,
159
+ value=DATA_CATEGORY,
160
+ label='Category',
161
+ interactive=True,
162
+ )
163
 
164
  with gr.Column():
165
+ table = gr.DataFrame(
166
+ value=initial_data,
167
+ interactive=False,
168
+ wrap=False,
169
+ column_widths=calculate_column_widths(initial_data),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  )
171
 
172
+ refresh_button = gr.Button("Refresh Data")
173
+
174
+ def refresh_and_update():
175
+ title, data = refresh_data()
176
+ return title, data
177
+
178
+ refresh_button.click(
179
+ fn=refresh_and_update,
180
+ outputs=[title_comp, table],
181
+ )
182
+
183
+ category_filter.change(
184
+ fn=update_table,
185
+ inputs=[category_filter],
186
+ outputs=table,
187
+ )
188
+
189
+
190
+ with gr.Row():
191
+ with gr.Accordion("Citation", open=False):
192
+ citation_button = gr.Textbox(
193
+ value=CITATION_BUTTON_TEXT,
194
+ label=CITATION_BUTTON_LABEL,
195
+ elem_id='citation-button',
196
+ lines=6, # 增加行数
197
+ max_lines=8, # 设置最大行数
198
+ show_copy_button=True # 添加复制按钮使其更方便使用
199
+ )
200
+
201
+ return demo
202
+
203
+
204
+ if __name__ == '__main__':
205
+ demo = create_interface()
206
+ demo.queue()
207
+ demo.launch(server_name='0.0.0.0')