Koshti10 commited on
Commit
e406705
·
verified ·
1 Parent(s): 866fbd6

Upload 4 files

Browse files
src/leaderboard_utils.py CHANGED
@@ -5,11 +5,11 @@ import json
5
  from io import StringIO
6
  from datetime import datetime
7
 
8
- from src.assets.text_content import REPO
9
 
10
  def get_github_data():
11
  """
12
- Read and process data from CSV files hosted on GitHub. - https://github.com/clembench/clembench-runs
13
  Set the path in src/assets/text_content/REPO
14
 
15
  Returns:
@@ -18,74 +18,60 @@ def get_github_data():
18
  - "multimodal": List of DataFrames for each version's multimodal leaderboard data.
19
  - "date": Formatted date of the latest version in "DD Month YYYY" format.
20
  """
21
- base_repo = REPO
22
- json_url = base_repo + "benchmark_runs.json"
23
  response = requests.get(json_url)
24
 
25
  # Check if the JSON file request was successful
26
  if response.status_code != 200:
27
- print(f"Failed to read JSON file: Status Code: {response.status_code}")
28
  return None, None, None, None
29
 
30
  json_data = response.json()
31
  versions = json_data['versions']
32
 
 
33
  version_names = sorted(
34
  [ver['version'] for ver in versions],
35
  key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
36
  reverse=True
37
  )
38
 
39
- # Get Leaderboard data - for text-only + multimodal
40
- github_data = {}
41
-
42
- # Collect Dataframes
43
- text_dfs = []
44
- mm_dfs = []
45
-
46
- text_flag = True
47
- text_date = ""
48
- mm_flag = True
49
- mm_date = ""
50
 
51
  for version in version_names:
52
- # Collect CSV data in descending order of clembench-runs versions
53
- # Collect Text-only data
54
- if len(version.split('_')) == 1:
55
- text_url = f"{base_repo}{version}/results.csv"
56
- csv_response = requests.get(text_url)
57
- if csv_response.status_code == 200:
58
- df = pd.read_csv(StringIO(csv_response.text))
59
- df = process_df(df)
60
- df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
61
- text_dfs.append(df)
62
- if text_flag:
63
- text_flag = False
64
- text_date = next(ver['last_updated'] for ver in versions if ver['version'] == version)
65
- text_date = datetime.strptime(text_date, "%Y-%m-%d").strftime("%d %b %Y")
66
 
 
 
 
 
 
 
 
 
 
67
  else:
68
- print(f"Failed to read Text-only leaderboard CSV file for version: {version}. Status Code: {csv_response.status_code}")
69
-
70
- # Check if version ends with 'multimodal' before constructing the URL
71
- mm_suffix = "_multimodal" if not version.endswith('multimodal') else ""
72
- mm_url = f"{base_repo}{version}{mm_suffix}/results.csv"
73
- mm_response = requests.get(mm_url)
74
- if mm_response.status_code == 200:
75
- df = pd.read_csv(StringIO(mm_response.text))
76
- df = process_df(df)
77
- df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
78
- mm_dfs.append(df)
79
- if mm_flag:
80
- mm_flag = False
81
- mm_date = next(ver['last_updated'] for ver in versions if ver['version'] == version)
82
- mm_date = datetime.strptime(mm_date, "%Y-%m-%d").strftime("%d %b %Y")
83
 
84
 
85
- github_data["text"] = text_dfs
86
- github_data["multimodal"] = mm_dfs
87
- github_data["date"] = text_date
88
- github_data["mm_date"] = mm_date
89
 
90
  return github_data
91
 
@@ -145,3 +131,7 @@ def query_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
145
 
146
  return filtered_df
147
 
 
 
 
 
 
5
  from io import StringIO
6
  from datetime import datetime
7
 
8
+ from src.assets.text_content import REPO, BENCHMARK_FILE
9
 
10
  def get_github_data():
11
  """
12
+ Read and process data from CSV files hosted on GitHub. - https://github.com/clembench/clembench-runs (REPO)
13
  Set the path in src/assets/text_content/REPO
14
 
15
  Returns:
 
18
  - "multimodal": List of DataFrames for each version's multimodal leaderboard data.
19
  - "date": Formatted date of the latest version in "DD Month YYYY" format.
20
  """
21
+ json_url = REPO + BENCHMARK_FILE
 
22
  response = requests.get(json_url)
23
 
24
  # Check if the JSON file request was successful
25
  if response.status_code != 200:
26
+ print(f"Failed to read JSON file - {BENCHMARK_FILE} in repo {REPO}: Status Code: {response.status_code}")
27
  return None, None, None, None
28
 
29
  json_data = response.json()
30
  versions = json_data['versions']
31
 
32
+ # Sort the versions in benchmark by latest first
33
  version_names = sorted(
34
  [ver['version'] for ver in versions],
35
  key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
36
  reverse=True
37
  )
38
 
39
+ # Collect Dataframes - Text and Multimodal Only - Ignoring _quantized, _backends, _ascii
40
+ text_data = {
41
+ 'version_data': [],
42
+ 'dataframes': []
43
+ }
44
+ multimodal_data = {
45
+ 'version_data': [],
46
+ 'dataframes': []
47
+ }
 
 
48
 
49
  for version in version_names:
50
+ results_url = f"{REPO}{version}/results.csv"
51
+ csv_response = requests.get(results_url)
52
+ if csv_response.status_code == 200:
53
+ df = pd.read_csv(StringIO(csv_response.text))
54
+ df = process_df(df)
55
+ df = df.sort_values(by=df.columns[1], ascending=False) # Sort by Clemscore
 
 
 
 
 
 
 
 
56
 
57
+ version_data = {
58
+ 'name': version,
59
+ 'last_updated': [datetime.strptime(v['last_updated'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version],
60
+ 'release_date': [datetime.strptime(v['release_date'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version]
61
+ }
62
+
63
+ if 'multimodal' in version:
64
+ multimodal_data['dataframes'].append(df)
65
+ multimodal_data['version_data'].append(version_data)
66
  else:
67
+ text_data['dataframes'].append(df)
68
+ text_data['version_data'].append(version_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
+ github_data = {
72
+ 'text': text_data,
73
+ 'multimodal': multimodal_data
74
+ }
75
 
76
  return github_data
77
 
 
131
 
132
  return filtered_df
133
 
134
+ if __name__=='__main__':
135
+ data = get_github_data()
136
+ print(data['text']['version_data'])
137
+ print(data['multimodal']['version_data'])
src/plot_utils.py CHANGED
@@ -173,7 +173,7 @@ def update_open_models(leaderboard: str = TEXT_NAME):
173
  Updated checkbox group for Open Models, based on the leaderboard selected
174
  """
175
  github_data = get_github_data()
176
- leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
177
  models = leaderboard_data.iloc[:, 0].unique().tolist()
178
  open_models, commercial_models = split_models(models)
179
  return gr.CheckboxGroup(
@@ -193,7 +193,7 @@ def update_closed_models(leaderboard: str = TEXT_NAME):
193
  Updated checkbox group for Closed Models, based on the leaderboard selected
194
  """
195
  github_data = get_github_data()
196
- leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
197
  models = leaderboard_data.iloc[:, 0].unique().tolist()
198
  open_models, commercial_models = split_models(models)
199
  return gr.CheckboxGroup(
@@ -212,7 +212,7 @@ def get_plot_df(leaderboard: str = TEXT_NAME) -> pd.DataFrame:
212
  DataFrame with model data.
213
  """
214
  github_data = get_github_data()
215
- return github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
216
 
217
 
218
  """
 
173
  Updated checkbox group for Open Models, based on the leaderboard selected
174
  """
175
  github_data = get_github_data()
176
+ leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"]['dataframes'][0]
177
  models = leaderboard_data.iloc[:, 0].unique().tolist()
178
  open_models, commercial_models = split_models(models)
179
  return gr.CheckboxGroup(
 
193
  Updated checkbox group for Closed Models, based on the leaderboard selected
194
  """
195
  github_data = get_github_data()
196
+ leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"]['dataframes'][0]
197
  models = leaderboard_data.iloc[:, 0].unique().tolist()
198
  open_models, commercial_models = split_models(models)
199
  return gr.CheckboxGroup(
 
212
  DataFrame with model data.
213
  """
214
  github_data = get_github_data()
215
+ return github_data["text" if leaderboard == TEXT_NAME else "multimodal"]['dataframes'][0]
216
 
217
 
218
  """
src/trend_utils.py CHANGED
@@ -79,10 +79,10 @@ def populate_list(df: pd.DataFrame, abs_diff: float) -> list:
79
  prev_clemscore = curr_clemscore
80
  prev_date = curr_date
81
 
82
- # Add the last model if the difference between the last and previous date is greater than 15 days
83
- last_date = df.iloc[-1]['release_date']
84
- if date_difference(last_date, prev_date) > 15:
85
- l.append(df.iloc[-1]['model'])
86
 
87
  return l
88
 
@@ -335,11 +335,14 @@ def get_final_trend_plot(benchmark: str = "Text", mobile_view: bool = False) ->
335
  else:
336
  height = 1000
337
 
338
- plot_kwargs = {'height': height, 'open_dip': -0.5, 'comm_dip': -5,
339
  'mobile_view': mobile_view}
340
 
 
 
 
341
  if benchmark == "Text":
342
- text_dfs = get_github_data()['text']
343
  text_result_df = get_trend_data(text_dfs, model_registry_data)
344
 
345
  ## Get benchmark tickvalues as dates for X-axis
@@ -349,7 +352,7 @@ def get_final_trend_plot(benchmark: str = "Text", mobile_view: bool = False) ->
349
  benchmark_ticks[pd.to_datetime(ver['release_date'])] = ver['version']
350
  fig = get_plot(text_result_df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, **plot_kwargs)
351
  else:
352
- mm_dfs = get_github_data()['multimodal']
353
  result_df = get_trend_data(mm_dfs, model_registry_data)
354
  df = result_df
355
 
@@ -357,9 +360,12 @@ def get_final_trend_plot(benchmark: str = "Text", mobile_view: bool = False) ->
357
  benchmark_ticks = {}
358
  for ver in versions:
359
  if 'multimodal' in ver['version']:
360
- ver['version'] = ver['version'].replace('_multimodal', '')
361
- if date_difference(ver['release_date'], '2024-07-15') >= 0:
362
- benchmark_ticks[pd.to_datetime(ver['release_date'])] = ver['version'] ## MM benchmark dates considered after v1.6 (incl.)
 
 
 
363
  fig = get_plot(df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, **plot_kwargs)
364
 
365
  return fig
 
79
  prev_clemscore = curr_clemscore
80
  prev_date = curr_date
81
 
82
+ # # Add the last model if the difference between the last and previous date is greater than 15 days
83
+ # last_date = df.iloc[-1]['release_date']
84
+ # if date_difference(last_date, prev_date) > 15:
85
+ # l.append(df.iloc[-1]['model'])
86
 
87
  return l
88
 
 
335
  else:
336
  height = 1000
337
 
338
+ plot_kwargs = {'height': height, 'open_dip': 0, 'comm_dip': 0,
339
  'mobile_view': mobile_view}
340
 
341
+ # plot_kwargs = {'height': height, 'open_dip': -0.5, 'comm_dip': -5,
342
+ # 'mobile_view': mobile_view}
343
+
344
  if benchmark == "Text":
345
+ text_dfs = get_github_data()['text']['dataframes']
346
  text_result_df = get_trend_data(text_dfs, model_registry_data)
347
 
348
  ## Get benchmark tickvalues as dates for X-axis
 
352
  benchmark_ticks[pd.to_datetime(ver['release_date'])] = ver['version']
353
  fig = get_plot(text_result_df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, **plot_kwargs)
354
  else:
355
+ mm_dfs = get_github_data()['multimodal']['dataframes']
356
  result_df = get_trend_data(mm_dfs, model_registry_data)
357
  df = result_df
358
 
 
360
  benchmark_ticks = {}
361
  for ver in versions:
362
  if 'multimodal' in ver['version']:
363
+ temp_ver = ver['version']
364
+ temp_ver = temp_ver.replace('_multimodal', '')
365
+ benchmark_ticks[pd.to_datetime(ver['release_date'])] = temp_ver ## MM benchmark dates considered after v1.6 (incl.)
366
+
367
+ print("benchmark_ticks")
368
+ print(benchmark_ticks)
369
  fig = get_plot(df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, **plot_kwargs)
370
 
371
  return fig
src/version_utils.py CHANGED
@@ -9,18 +9,20 @@ import json
9
  from io import StringIO
10
 
11
  from src.leaderboard_utils import process_df
12
- from src.assets.text_content import REPO
13
 
14
- def get_versions_data():
 
 
15
  """
16
  Read and process data from CSV files of all available versions hosted on GitHub. - https://github.com/clembench/clembench-runs
17
 
18
  Returns:
19
- versions_data:
20
  -
21
  """
22
  base_repo = REPO
23
- json_url = base_repo + "benchmark_runs.json"
24
  response = requests.get(json_url)
25
 
26
  # Check if the JSON file request was successful
@@ -37,51 +39,44 @@ def get_versions_data():
37
  reverse=True
38
  )
39
 
40
- # Get Last updated date of the latest version
41
- latest_version = version_names[0]
42
- latest_date = next(
43
- ver['last_updated'] for ver in versions if ver['version'] == latest_version
44
- )
45
- formatted_date = datetime.strptime(latest_date, "%Y-%m-%d").strftime("%d %b %Y")
46
-
47
- # Get Versions data
48
- versions_data = {"latest": latest_version, "date": formatted_date}
49
-
50
- # Collect Dataframes
51
- dfs = []
52
 
53
  for version in version_names:
54
- text_url = f"{base_repo}{version}/results.csv"
55
- mm_url = f"{base_repo}{version}_multimodal/results.csv"
56
- quant_url = f"{base_repo}{version}_quantized/results.csv"
57
-
58
- # Text Data
59
- response = requests.get(text_url)
60
  if response.status_code == 200:
61
  df = pd.read_csv(StringIO(response.text))
62
  df = process_df(df)
63
  df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
64
- versions_data[version] = df
65
-
66
- # Multimodal Data
67
- mm_response = requests.get(mm_url)
68
- if mm_response.status_code == 200:
69
- mm_df = pd.read_csv(StringIO(mm_response.text))
70
- mm_df = process_df(mm_df)
71
- mm_df = mm_df.sort_values(by=mm_df.columns[1], ascending=False) # Sort by clemscore column
72
- versions_data[version+"_multimodal"] = mm_df
73
-
74
- # Multimodal Data
75
- q_response = requests.get(quant_url)
76
- if q_response.status_code == 200:
77
- q_df = pd.read_csv(StringIO(q_response.text))
78
- q_df = process_df(q_df)
79
- q_df = q_df.sort_values(by=q_df.columns[1], ascending=False) # Sort by clemscore column
80
- versions_data[version + "_quantized"] = q_df
81
-
82
- return versions_data
 
 
 
 
 
83
 
84
 
85
  if __name__ == "__main__":
86
- versions_data = get_versions_data()
87
- print(versions_data.keys())
 
9
  from io import StringIO
10
 
11
  from src.leaderboard_utils import process_df
12
+ from src.assets.text_content import REPO, BENCHMARK_FILE
13
 
14
+ VARIANTS = ['ascii', 'backends', 'quantized'] # Include other variants if added in the main clembench-runs repo
15
+
16
+ def get_version_data():
17
  """
18
  Read and process data from CSV files of all available versions hosted on GitHub. - https://github.com/clembench/clembench-runs
19
 
20
  Returns:
21
+ version_data:
22
  -
23
  """
24
  base_repo = REPO
25
+ json_url = base_repo + BENCHMARK_FILE
26
  response = requests.get(json_url)
27
 
28
  # Check if the JSON file request was successful
 
39
  reverse=True
40
  )
41
 
42
+ version_data = {
43
+ 'versions': [],
44
+ 'dataframes': []
45
+ }
 
 
 
 
 
 
 
 
46
 
47
  for version in version_names:
48
+ base_url = f"{base_repo}{version}/results.csv"
49
+ response = requests.get(base_url)
 
 
 
 
50
  if response.status_code == 200:
51
  df = pd.read_csv(StringIO(response.text))
52
  df = process_df(df)
53
  df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
54
+ version_data['dataframes'].append(df)
55
+ metadata = {
56
+ 'name': version,
57
+ 'last_updated': [datetime.strptime(v['last_updated'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version],
58
+ 'release_date': [datetime.strptime(v['release_date'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version]
59
+ }
60
+ version_data['versions'].append(metadata)
61
+
62
+ # Look for variant results file
63
+ version = version.split('_')[0] # Remove _multimodal suffix, and check for other suffixes
64
+ for suffix in VARIANTS:
65
+ base_url = f"{base_repo}{version}_{suffix}/results.csv"
66
+ response = requests.get(base_url)
67
+ if response.status_code == 200:
68
+ df = pd.read_csv(StringIO(response.text))
69
+ df = process_df(df)
70
+ df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
71
+ version_data['dataframes'].append(df)
72
+ metadata = {
73
+ 'name': version + "_" + suffix # Skip Release date and last updated # Not included in becnhmark_runs.json
74
+ }
75
+ version_data['versions'].append(metadata)
76
+
77
+ return version_data
78
 
79
 
80
  if __name__ == "__main__":
81
+ version_data = get_version_data()
82
+ print(version_data['versions'])