dongsheng commited on
Commit
2262cad
·
verified ·
1 Parent(s): 96f681b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -100
app.py CHANGED
@@ -1,9 +1,12 @@
1
  import gradio as gr
2
  import json
3
  import pandas as pd
 
 
4
  from urllib.request import urlopen, URLError
5
  import re
6
  from datetime import datetime
 
7
 
8
  # Constants
9
  CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
@@ -11,16 +14,23 @@ CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
11
  author={OpenCompass Contributors},
12
  howpublished = {\url{https://github.com/open-compass/opencompass}},
13
  year={2023}
 
14
  }"""
15
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
16
- # 开发环境
17
- # DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/research-rank/research-data.REALTIME."
18
- # 生产环境
 
 
 
 
 
19
  DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME."
20
 
21
  def find_latest_data_url():
22
  """Find the latest available data URL by trying different dates."""
23
  today = datetime.now()
 
24
  for i in range(365):
25
  date = today.replace(day=today.day - i)
26
  date_str = date.strftime("%Y%m%d")
@@ -30,6 +40,7 @@ def find_latest_data_url():
30
  return url, date_str
31
  except URLError:
32
  continue
 
33
  return None, None
34
 
35
  def get_latest_data():
@@ -40,6 +51,7 @@ def get_latest_data():
40
  formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d")
41
  return data_url, formatted_update_time
42
 
 
43
  def get_leaderboard_title(update_time):
44
  return f"# CompassAcademic Leaderboard (Last Updated: {update_time})"
45
 
@@ -50,36 +62,72 @@ The CompassAcademic currently focuses on the comprehensive reasoning abilities o
50
  - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
51
  """
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown']
54
  MODEL_TYPE = ['API', 'OpenSource']
55
 
 
56
  def load_data(data_url):
57
  response = urlopen(data_url)
58
  data = json.loads(response.read().decode('utf-8'))
59
  return data
60
 
 
61
  def build_main_table(data):
62
  df = pd.DataFrame(data['globalData']['OverallTable'])
 
 
63
  models_data = data['models']
64
  df['OpenSource'] = df['model'].apply(
65
  lambda x: 'Yes' if models_data[x]['release'] == 'OpenSource' else 'No'
66
  )
 
 
67
  df['Rank'] = df['Average'].rank(ascending=False, method='min').astype(int)
68
-
69
  columns = {
70
- 'Rank': 'Rank', 'model': 'Model', 'org': 'Organization', 'num': 'Parameters',
71
- 'OpenSource': 'OpenSource', 'Average': 'Average Score', 'BBH': 'BBH',
72
- 'Math-500': 'Math-500', 'AIME': 'AIME', 'MMLU-Pro': 'MMLU-Pro',
73
- 'LiveCodeBench': 'LiveCodeBench', 'HumanEval': 'HumanEval',
74
- 'GQPA-Diamond': 'GQPA-Diamond', 'IFEval': 'IFEval',
 
 
 
 
 
 
 
 
 
75
  }
76
  df = df[list(columns.keys())].rename(columns=columns)
77
  return df
78
 
 
79
  def filter_table(df, size_ranges, model_types):
80
  filtered_df = df.copy()
81
-
 
82
  if size_ranges:
 
83
  def get_size_in_B(param):
84
  if param == 'N/A':
85
  return None
@@ -87,23 +135,30 @@ def filter_table(df, size_ranges, model_types):
87
  return float(param.replace('B', ''))
88
  except:
89
  return None
90
-
91
- filtered_df['size_in_B'] = filtered_df['Parameters'].apply(get_size_in_B)
 
 
 
92
  mask = pd.Series(False, index=filtered_df.index)
93
-
94
  for size_range in size_ranges:
95
  if size_range == '<10B':
96
- mask |= (filtered_df['size_in_B'] < 10) & (filtered_df['size_in_B'].notna())
 
 
97
  elif size_range == '10B-70B':
98
- mask |= (filtered_df['size_in_B'] >= 10) & (filtered_df['size_in_B'] < 70)
 
 
99
  elif size_range == '>70B':
100
  mask |= filtered_df['size_in_B'] >= 70
101
  elif size_range == 'Unknown':
102
  mask |= filtered_df['size_in_B'].isna()
103
-
104
  filtered_df = filtered_df[mask]
105
  filtered_df.drop('size_in_B', axis=1, inplace=True)
106
-
 
107
  if model_types:
108
  type_mask = pd.Series(False, index=filtered_df.index)
109
  for model_type in model_types:
@@ -112,79 +167,49 @@ def filter_table(df, size_ranges, model_types):
112
  elif model_type == 'OpenSource':
113
  type_mask |= filtered_df['OpenSource'] == 'Yes'
114
  filtered_df = filtered_df[type_mask]
115
-
116
  return filtered_df
117
 
 
118
  def calculate_column_widths(df):
 
119
  column_widths = []
 
120
  for column in df.columns:
 
121
  header_length = len(str(column))
122
  max_content_length = df[column].astype(str).map(len).max()
 
 
 
 
 
123
  width = max(header_length * 10, max_content_length * 8) + 20
124
- width = max(160, min(400, width))
 
 
 
 
 
 
125
  column_widths.append(width)
126
- return column_widths
127
 
128
- class DataState:
129
- def __init__(self):
130
- self.current_df = None
131
 
132
- data_state = DataState()
133
 
134
  def create_interface():
135
- empty_df = pd.DataFrame(columns=[
136
- 'Rank', 'Model', 'Organization', 'Parameters', 'OpenSource',
137
- 'Average Score', 'BBH', 'Math-500', 'AIME', 'MMLU-Pro',
138
- 'LiveCodeBench', 'HumanEval', 'GQPA-Diamond', 'IFEval'
139
- ])
140
 
141
- def load_initial_data():
142
- try:
143
- data_url, update_time = get_latest_data()
144
- data = load_data(data_url)
145
- new_df = build_main_table(data)
146
- data_state.current_df = new_df
147
- filtered_df = filter_table(new_df, MODEL_SIZE, MODEL_TYPE)
148
- return get_leaderboard_title(update_time), filtered_df.sort_values("Average Score", ascending=False)
149
- except Exception as e:
150
- print(f"Error loading initial data: {e}")
151
- return "# CompassAcademic Leaderboard (Error loading data)", empty_df
152
-
153
- def refresh_data():
154
- try:
155
- data_url, update_time = get_latest_data()
156
- data = load_data(data_url)
157
- new_df = build_main_table(data)
158
- data_state.current_df = new_df
159
- filtered_df = filter_table(new_df, MODEL_SIZE, MODEL_TYPE)
160
- return get_leaderboard_title(update_time), filtered_df.sort_values("Average Score", ascending=False)
161
- except Exception as e:
162
- print(f"Error refreshing data: {e}")
163
- return None, None
164
-
165
- def auto_refresh():
166
- """Single refresh function for automatic updates"""
167
- title, data = refresh_data()
168
- status = f"Last auto update: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
169
- if title and data is not None:
170
- return title, data, status
171
- return None, None, None
172
-
173
- def update_table(size_ranges, model_types):
174
- if data_state.current_df is None:
175
- return empty_df
176
- filtered_df = filter_table(data_state.current_df, size_ranges, model_types)
177
- return filtered_df.sort_values("Average Score", ascending=False)
178
-
179
- initial_title, initial_data = load_initial_data()
180
-
181
  with gr.Blocks() as demo:
182
- title_comp = gr.Markdown(initial_title)
183
-
184
  with gr.Tabs() as tabs:
185
  with gr.TabItem("🏅 Main Leaderboard", elem_id='main'):
186
  gr.Markdown(MAIN_LEADERBOARD_DESCRIPTION)
187
-
188
  with gr.Row():
189
  with gr.Column():
190
  size_filter = gr.CheckboxGroup(
@@ -200,47 +225,52 @@ def create_interface():
200
  label='Model Type',
201
  interactive=True,
202
  )
203
-
204
  with gr.Column():
205
  table = gr.DataFrame(
206
- value=initial_data,
207
  interactive=False,
208
- wrap=False,
209
- column_widths=calculate_column_widths(initial_data),
210
  )
211
-
212
- refresh_button = gr.Button("Refresh Data")
213
- update_status = gr.Markdown("Last update: " + datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
214
-
215
- def refresh_and_update():
216
- title, data = refresh_data()
217
- status = f"Last manual update: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
218
- return title, data, status
219
-
220
- refresh_button.click(
221
- fn=refresh_and_update,
222
- outputs=[title_comp, table, update_status],
223
- )
224
-
225
- # 添加自动更新功能
226
- demo.load(
227
- fn=auto_refresh,
228
- outputs=[title_comp, table, update_status],
229
- every=10 # 每10秒更新一次
230
- )
231
-
 
 
232
  size_filter.change(
233
  fn=update_table,
234
  inputs=[size_filter, type_filter],
235
  outputs=table,
236
  )
237
-
238
  type_filter.change(
239
  fn=update_table,
240
  inputs=[size_filter, type_filter],
241
  outputs=table,
242
  )
243
 
 
 
 
244
  with gr.Row():
245
  with gr.Accordion("Citation", open=False):
246
  citation_button = gr.Textbox(
@@ -251,7 +281,7 @@ def create_interface():
251
 
252
  return demo
253
 
 
254
  if __name__ == '__main__':
255
  demo = create_interface()
256
- demo.queue()
257
- demo.launch(server_name='0.0.0.0')
 
1
  import gradio as gr
2
  import json
3
  import pandas as pd
4
+ from collections import defaultdict
5
+ import copy as cp
6
  from urllib.request import urlopen, URLError
7
  import re
8
  from datetime import datetime
9
+ import time
10
 
11
  # Constants
12
  CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
 
14
  author={OpenCompass Contributors},
15
  howpublished = {\url{https://github.com/open-compass/opencompass}},
16
  year={2023}
17
+ },
18
  }"""
19
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
20
+ OPENCOMPASS_README = (
21
+ 'https://raw.githubusercontent.com/open-compass/opencompass/main/README.md'
22
+ )
23
+ GITHUB_REPO = 'https://github.com/open-compass/opencompass'
24
+ GITHUB_RAW = 'https://raw.githubusercontent.com/open-compass/opencompass'
25
+ GITHUB_BLOB = 'https://github.com/open-compass/opencompass/blob'
26
+
27
+ # Base URL for the JSON data
28
  DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME."
29
 
30
  def find_latest_data_url():
31
  """Find the latest available data URL by trying different dates."""
32
  today = datetime.now()
33
+ # Try last 365 days
34
  for i in range(365):
35
  date = today.replace(day=today.day - i)
36
  date_str = date.strftime("%Y%m%d")
 
40
  return url, date_str
41
  except URLError:
42
  continue
43
+ # If no valid URL found, return None
44
  return None, None
45
 
46
  def get_latest_data():
 
51
  formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d")
52
  return data_url, formatted_update_time
53
 
54
+ # Markdown content
55
  def get_leaderboard_title(update_time):
56
  return f"# CompassAcademic Leaderboard (Last Updated: {update_time})"
57
 
 
62
  - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
63
  """
64
 
65
+ def fix_image_urls(content):
66
+ """Fix image URLs in markdown content."""
67
+ # Handle the specific logo.svg path
68
+ content = content.replace(
69
+ 'docs/en/_static/image/logo.svg',
70
+ 'https://raw.githubusercontent.com/open-compass/opencompass/main/docs/en/_static/image/logo.svg',
71
+ )
72
+
73
+ # Replace other relative image paths with absolute GitHub URLs
74
+ content = re.sub(
75
+ r'!\[[^\]]*\]\((?!http)([^)]+)\)',
76
+ lambda m: f'![{m.group(0)}](https://raw.githubusercontent.com/open-compass/opencompass/main/{m.group(1)})',
77
+ content,
78
+ )
79
+
80
+ return content
81
+
82
+
83
  MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown']
84
  MODEL_TYPE = ['API', 'OpenSource']
85
 
86
+
87
  def load_data(data_url):
88
  response = urlopen(data_url)
89
  data = json.loads(response.read().decode('utf-8'))
90
  return data
91
 
92
+
93
  def build_main_table(data):
94
  df = pd.DataFrame(data['globalData']['OverallTable'])
95
+
96
+ # Add OpenSource column based on models data
97
  models_data = data['models']
98
  df['OpenSource'] = df['model'].apply(
99
  lambda x: 'Yes' if models_data[x]['release'] == 'OpenSource' else 'No'
100
  )
101
+
102
+ # Add Rank column based on Average Score
103
  df['Rank'] = df['Average'].rank(ascending=False, method='min').astype(int)
104
+
105
  columns = {
106
+ 'Rank': 'Rank',
107
+ 'model': 'Model',
108
+ 'org': 'Organization',
109
+ 'num': 'Parameters',
110
+ 'OpenSource': 'OpenSource',
111
+ 'Average': 'Average Score',
112
+ 'BBH': 'BBH',
113
+ 'Math-500': 'Math-500',
114
+ 'AIME': 'AIME',
115
+ 'MMLU-Pro': 'MMLU-Pro',
116
+ 'LiveCodeBench': 'LiveCodeBench',
117
+ 'HumanEval': 'HumanEval',
118
+ 'GQPA-Diamond': 'GQPA-Diamond',
119
+ 'IFEval': 'IFEval',
120
  }
121
  df = df[list(columns.keys())].rename(columns=columns)
122
  return df
123
 
124
+
125
  def filter_table(df, size_ranges, model_types):
126
  filtered_df = df.copy()
127
+
128
+ # Filter by size
129
  if size_ranges:
130
+
131
  def get_size_in_B(param):
132
  if param == 'N/A':
133
  return None
 
135
  return float(param.replace('B', ''))
136
  except:
137
  return None
138
+
139
+ filtered_df['size_in_B'] = filtered_df['Parameters'].apply(
140
+ get_size_in_B
141
+ )
142
+
143
  mask = pd.Series(False, index=filtered_df.index)
 
144
  for size_range in size_ranges:
145
  if size_range == '<10B':
146
+ mask |= (filtered_df['size_in_B'] < 10) & (
147
+ filtered_df['size_in_B'].notna()
148
+ )
149
  elif size_range == '10B-70B':
150
+ mask |= (filtered_df['size_in_B'] >= 10) & (
151
+ filtered_df['size_in_B'] < 70
152
+ )
153
  elif size_range == '>70B':
154
  mask |= filtered_df['size_in_B'] >= 70
155
  elif size_range == 'Unknown':
156
  mask |= filtered_df['size_in_B'].isna()
157
+
158
  filtered_df = filtered_df[mask]
159
  filtered_df.drop('size_in_B', axis=1, inplace=True)
160
+
161
+ # Filter by model type
162
  if model_types:
163
  type_mask = pd.Series(False, index=filtered_df.index)
164
  for model_type in model_types:
 
167
  elif model_type == 'OpenSource':
168
  type_mask |= filtered_df['OpenSource'] == 'Yes'
169
  filtered_df = filtered_df[type_mask]
170
+
171
  return filtered_df
172
 
173
+
174
  def calculate_column_widths(df):
175
+ """Dynamically calculate column widths based on content length."""
176
  column_widths = []
177
+
178
  for column in df.columns:
179
+ # Get max length of column name and values
180
  header_length = len(str(column))
181
  max_content_length = df[column].astype(str).map(len).max()
182
+
183
+ # Use the larger of header or content length
184
+ # Multiply by average character width (approximately 8 pixels)
185
+ # Add padding (20 pixels)
186
+ # Increase the multiplier for header length to ensure it fits
187
  width = max(header_length * 10, max_content_length * 8) + 20
188
+
189
+ # Set minimum width (200 pixels)
190
+ width = max(160, width)
191
+
192
+ # Set maximum width (400 pixels) to prevent extremely wide columns
193
+ width = min(400, width)
194
+
195
  column_widths.append(width)
 
196
 
197
+ return column_widths
 
 
198
 
 
199
 
200
  def create_interface():
201
+ data_url, update_time = get_latest_data()
202
+ data = load_data(data_url)
203
+ df = build_main_table(data)
204
+ title = gr.Markdown(get_leaderboard_title(update_time))
 
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  with gr.Blocks() as demo:
207
+ title_comp = gr.Markdown(get_leaderboard_title(update_time))
208
+
209
  with gr.Tabs() as tabs:
210
  with gr.TabItem("🏅 Main Leaderboard", elem_id='main'):
211
  gr.Markdown(MAIN_LEADERBOARD_DESCRIPTION)
212
+
213
  with gr.Row():
214
  with gr.Column():
215
  size_filter = gr.CheckboxGroup(
 
225
  label='Model Type',
226
  interactive=True,
227
  )
228
+
229
  with gr.Column():
230
  table = gr.DataFrame(
231
+ value=df.sort_values("Average Score", ascending=False),
232
  interactive=False,
233
+ wrap=False, # 禁用自动换行
234
+ column_widths=calculate_column_widths(df),
235
  )
236
+
237
+ def update_data():
238
+ """Periodically check for new data and update the interface"""
239
+ while True:
240
+ time.sleep(300) # Check every 5 minutes
241
+ try:
242
+ new_data_url, new_update_time = get_latest_data()
243
+ if new_data_url != data_url:
244
+ new_data = load_data(new_data_url)
245
+ new_df = build_main_table(new_data)
246
+ filtered_df = filter_table(new_df, size_filter.value, type_filter.value)
247
+ title_comp.value = get_leaderboard_title(new_update_time)
248
+ table.value = filtered_df.sort_values("Average Score", ascending=False)
249
+ except Exception as e:
250
+ print(f"Error updating data: {e}")
251
+ continue
252
+
253
+ def update_table(size_ranges, model_types):
254
+ filtered_df = filter_table(df, size_ranges, model_types)
255
+ return filtered_df.sort_values(
256
+ "Average Score", ascending=False
257
+ )
258
+
259
  size_filter.change(
260
  fn=update_table,
261
  inputs=[size_filter, type_filter],
262
  outputs=table,
263
  )
264
+
265
  type_filter.change(
266
  fn=update_table,
267
  inputs=[size_filter, type_filter],
268
  outputs=table,
269
  )
270
 
271
+ # Set up periodic data update
272
+ demo.load(update_data)
273
+
274
  with gr.Row():
275
  with gr.Accordion("Citation", open=False):
276
  citation_button = gr.Textbox(
 
281
 
282
  return demo
283
 
284
+
285
  if __name__ == '__main__':
286
  demo = create_interface()
287
+ demo.launch(server_name='0.0.0.0')