Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- src/leaderboard_utils.py +39 -49
- src/plot_utils.py +3 -3
- src/trend_utils.py +16 -10
- src/version_utils.py +38 -43
src/leaderboard_utils.py
CHANGED
@@ -5,11 +5,11 @@ import json
|
|
5 |
from io import StringIO
|
6 |
from datetime import datetime
|
7 |
|
8 |
-
from src.assets.text_content import REPO
|
9 |
|
10 |
def get_github_data():
|
11 |
"""
|
12 |
-
Read and process data from CSV files hosted on GitHub. - https://github.com/clembench/clembench-runs
|
13 |
Set the path in src/assets/text_content/REPO
|
14 |
|
15 |
Returns:
|
@@ -18,74 +18,60 @@ def get_github_data():
|
|
18 |
- "multimodal": List of DataFrames for each version's multimodal leaderboard data.
|
19 |
- "date": Formatted date of the latest version in "DD Month YYYY" format.
|
20 |
"""
|
21 |
-
|
22 |
-
json_url = base_repo + "benchmark_runs.json"
|
23 |
response = requests.get(json_url)
|
24 |
|
25 |
# Check if the JSON file request was successful
|
26 |
if response.status_code != 200:
|
27 |
-
print(f"Failed to read JSON file: Status Code: {response.status_code}")
|
28 |
return None, None, None, None
|
29 |
|
30 |
json_data = response.json()
|
31 |
versions = json_data['versions']
|
32 |
|
|
|
33 |
version_names = sorted(
|
34 |
[ver['version'] for ver in versions],
|
35 |
key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
|
36 |
reverse=True
|
37 |
)
|
38 |
|
39 |
-
#
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
mm_flag = True
|
49 |
-
mm_date = ""
|
50 |
|
51 |
for version in version_names:
|
52 |
-
|
53 |
-
|
54 |
-
if
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
df = pd.read_csv(StringIO(csv_response.text))
|
59 |
-
df = process_df(df)
|
60 |
-
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
|
61 |
-
text_dfs.append(df)
|
62 |
-
if text_flag:
|
63 |
-
text_flag = False
|
64 |
-
text_date = next(ver['last_updated'] for ver in versions if ver['version'] == version)
|
65 |
-
text_date = datetime.strptime(text_date, "%Y-%m-%d").strftime("%d %b %Y")
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
else:
|
68 |
-
|
69 |
-
|
70 |
-
# Check if version ends with 'multimodal' before constructing the URL
|
71 |
-
mm_suffix = "_multimodal" if not version.endswith('multimodal') else ""
|
72 |
-
mm_url = f"{base_repo}{version}{mm_suffix}/results.csv"
|
73 |
-
mm_response = requests.get(mm_url)
|
74 |
-
if mm_response.status_code == 200:
|
75 |
-
df = pd.read_csv(StringIO(mm_response.text))
|
76 |
-
df = process_df(df)
|
77 |
-
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
|
78 |
-
mm_dfs.append(df)
|
79 |
-
if mm_flag:
|
80 |
-
mm_flag = False
|
81 |
-
mm_date = next(ver['last_updated'] for ver in versions if ver['version'] == version)
|
82 |
-
mm_date = datetime.strptime(mm_date, "%Y-%m-%d").strftime("%d %b %Y")
|
83 |
|
84 |
|
85 |
-
github_data
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
|
90 |
return github_data
|
91 |
|
@@ -145,3 +131,7 @@ def query_search(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
|
145 |
|
146 |
return filtered_df
|
147 |
|
|
|
|
|
|
|
|
|
|
5 |
from io import StringIO
|
6 |
from datetime import datetime
|
7 |
|
8 |
+
from src.assets.text_content import REPO, BENCHMARK_FILE
|
9 |
|
10 |
def get_github_data():
|
11 |
"""
|
12 |
+
Read and process data from CSV files hosted on GitHub. - https://github.com/clembench/clembench-runs (REPO)
|
13 |
Set the path in src/assets/text_content/REPO
|
14 |
|
15 |
Returns:
|
|
|
18 |
- "multimodal": List of DataFrames for each version's multimodal leaderboard data.
|
19 |
- "date": Formatted date of the latest version in "DD Month YYYY" format.
|
20 |
"""
|
21 |
+
json_url = REPO + BENCHMARK_FILE
|
|
|
22 |
response = requests.get(json_url)
|
23 |
|
24 |
# Check if the JSON file request was successful
|
25 |
if response.status_code != 200:
|
26 |
+
print(f"Failed to read JSON file - {BENCHMARK_FILE} in repo {REPO}: Status Code: {response.status_code}")
|
27 |
return None, None, None, None
|
28 |
|
29 |
json_data = response.json()
|
30 |
versions = json_data['versions']
|
31 |
|
32 |
+
# Sort the versions in benchmark by latest first
|
33 |
version_names = sorted(
|
34 |
[ver['version'] for ver in versions],
|
35 |
key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
|
36 |
reverse=True
|
37 |
)
|
38 |
|
39 |
+
# Collect Dataframes - Text and Multimodal Only - Ignoring _quantized, _backends, _ascii
|
40 |
+
text_data = {
|
41 |
+
'version_data': [],
|
42 |
+
'dataframes': []
|
43 |
+
}
|
44 |
+
multimodal_data = {
|
45 |
+
'version_data': [],
|
46 |
+
'dataframes': []
|
47 |
+
}
|
|
|
|
|
48 |
|
49 |
for version in version_names:
|
50 |
+
results_url = f"{REPO}{version}/results.csv"
|
51 |
+
csv_response = requests.get(results_url)
|
52 |
+
if csv_response.status_code == 200:
|
53 |
+
df = pd.read_csv(StringIO(csv_response.text))
|
54 |
+
df = process_df(df)
|
55 |
+
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by Clemscore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
+
version_data = {
|
58 |
+
'name': version,
|
59 |
+
'last_updated': [datetime.strptime(v['last_updated'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version],
|
60 |
+
'release_date': [datetime.strptime(v['release_date'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version]
|
61 |
+
}
|
62 |
+
|
63 |
+
if 'multimodal' in version:
|
64 |
+
multimodal_data['dataframes'].append(df)
|
65 |
+
multimodal_data['version_data'].append(version_data)
|
66 |
else:
|
67 |
+
text_data['dataframes'].append(df)
|
68 |
+
text_data['version_data'].append(version_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
|
71 |
+
github_data = {
|
72 |
+
'text': text_data,
|
73 |
+
'multimodal': multimodal_data
|
74 |
+
}
|
75 |
|
76 |
return github_data
|
77 |
|
|
|
131 |
|
132 |
return filtered_df
|
133 |
|
134 |
+
if __name__=='__main__':
|
135 |
+
data = get_github_data()
|
136 |
+
print(data['text']['version_data'])
|
137 |
+
print(data['multimodal']['version_data'])
|
src/plot_utils.py
CHANGED
@@ -173,7 +173,7 @@ def update_open_models(leaderboard: str = TEXT_NAME):
|
|
173 |
Updated checkbox group for Open Models, based on the leaderboard selected
|
174 |
"""
|
175 |
github_data = get_github_data()
|
176 |
-
leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
|
177 |
models = leaderboard_data.iloc[:, 0].unique().tolist()
|
178 |
open_models, commercial_models = split_models(models)
|
179 |
return gr.CheckboxGroup(
|
@@ -193,7 +193,7 @@ def update_closed_models(leaderboard: str = TEXT_NAME):
|
|
193 |
Updated checkbox group for Closed Models, based on the leaderboard selected
|
194 |
"""
|
195 |
github_data = get_github_data()
|
196 |
-
leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
|
197 |
models = leaderboard_data.iloc[:, 0].unique().tolist()
|
198 |
open_models, commercial_models = split_models(models)
|
199 |
return gr.CheckboxGroup(
|
@@ -212,7 +212,7 @@ def get_plot_df(leaderboard: str = TEXT_NAME) -> pd.DataFrame:
|
|
212 |
DataFrame with model data.
|
213 |
"""
|
214 |
github_data = get_github_data()
|
215 |
-
return github_data["text" if leaderboard == TEXT_NAME else "multimodal"][0]
|
216 |
|
217 |
|
218 |
"""
|
|
|
173 |
Updated checkbox group for Open Models, based on the leaderboard selected
|
174 |
"""
|
175 |
github_data = get_github_data()
|
176 |
+
leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"]['dataframes'][0]
|
177 |
models = leaderboard_data.iloc[:, 0].unique().tolist()
|
178 |
open_models, commercial_models = split_models(models)
|
179 |
return gr.CheckboxGroup(
|
|
|
193 |
Updated checkbox group for Closed Models, based on the leaderboard selected
|
194 |
"""
|
195 |
github_data = get_github_data()
|
196 |
+
leaderboard_data = github_data["text" if leaderboard == TEXT_NAME else "multimodal"]['dataframes'][0]
|
197 |
models = leaderboard_data.iloc[:, 0].unique().tolist()
|
198 |
open_models, commercial_models = split_models(models)
|
199 |
return gr.CheckboxGroup(
|
|
|
212 |
DataFrame with model data.
|
213 |
"""
|
214 |
github_data = get_github_data()
|
215 |
+
return github_data["text" if leaderboard == TEXT_NAME else "multimodal"]['dataframes'][0]
|
216 |
|
217 |
|
218 |
"""
|
src/trend_utils.py
CHANGED
@@ -79,10 +79,10 @@ def populate_list(df: pd.DataFrame, abs_diff: float) -> list:
|
|
79 |
prev_clemscore = curr_clemscore
|
80 |
prev_date = curr_date
|
81 |
|
82 |
-
# Add the last model if the difference between the last and previous date is greater than 15 days
|
83 |
-
last_date = df.iloc[-1]['release_date']
|
84 |
-
if date_difference(last_date, prev_date) > 15:
|
85 |
-
|
86 |
|
87 |
return l
|
88 |
|
@@ -335,11 +335,14 @@ def get_final_trend_plot(benchmark: str = "Text", mobile_view: bool = False) ->
|
|
335 |
else:
|
336 |
height = 1000
|
337 |
|
338 |
-
plot_kwargs = {'height': height, 'open_dip':
|
339 |
'mobile_view': mobile_view}
|
340 |
|
|
|
|
|
|
|
341 |
if benchmark == "Text":
|
342 |
-
text_dfs = get_github_data()['text']
|
343 |
text_result_df = get_trend_data(text_dfs, model_registry_data)
|
344 |
|
345 |
## Get benchmark tickvalues as dates for X-axis
|
@@ -349,7 +352,7 @@ def get_final_trend_plot(benchmark: str = "Text", mobile_view: bool = False) ->
|
|
349 |
benchmark_ticks[pd.to_datetime(ver['release_date'])] = ver['version']
|
350 |
fig = get_plot(text_result_df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, **plot_kwargs)
|
351 |
else:
|
352 |
-
mm_dfs = get_github_data()['multimodal']
|
353 |
result_df = get_trend_data(mm_dfs, model_registry_data)
|
354 |
df = result_df
|
355 |
|
@@ -357,9 +360,12 @@ def get_final_trend_plot(benchmark: str = "Text", mobile_view: bool = False) ->
|
|
357 |
benchmark_ticks = {}
|
358 |
for ver in versions:
|
359 |
if 'multimodal' in ver['version']:
|
360 |
-
|
361 |
-
|
362 |
-
benchmark_ticks[pd.to_datetime(ver['release_date'])] =
|
|
|
|
|
|
|
363 |
fig = get_plot(df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, **plot_kwargs)
|
364 |
|
365 |
return fig
|
|
|
79 |
prev_clemscore = curr_clemscore
|
80 |
prev_date = curr_date
|
81 |
|
82 |
+
# # Add the last model if the difference between the last and previous date is greater than 15 days
|
83 |
+
# last_date = df.iloc[-1]['release_date']
|
84 |
+
# if date_difference(last_date, prev_date) > 15:
|
85 |
+
# l.append(df.iloc[-1]['model'])
|
86 |
|
87 |
return l
|
88 |
|
|
|
335 |
else:
|
336 |
height = 1000
|
337 |
|
338 |
+
plot_kwargs = {'height': height, 'open_dip': 0, 'comm_dip': 0,
|
339 |
'mobile_view': mobile_view}
|
340 |
|
341 |
+
# plot_kwargs = {'height': height, 'open_dip': -0.5, 'comm_dip': -5,
|
342 |
+
# 'mobile_view': mobile_view}
|
343 |
+
|
344 |
if benchmark == "Text":
|
345 |
+
text_dfs = get_github_data()['text']['dataframes']
|
346 |
text_result_df = get_trend_data(text_dfs, model_registry_data)
|
347 |
|
348 |
## Get benchmark tickvalues as dates for X-axis
|
|
|
352 |
benchmark_ticks[pd.to_datetime(ver['release_date'])] = ver['version']
|
353 |
fig = get_plot(text_result_df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, **plot_kwargs)
|
354 |
else:
|
355 |
+
mm_dfs = get_github_data()['multimodal']['dataframes']
|
356 |
result_df = get_trend_data(mm_dfs, model_registry_data)
|
357 |
df = result_df
|
358 |
|
|
|
360 |
benchmark_ticks = {}
|
361 |
for ver in versions:
|
362 |
if 'multimodal' in ver['version']:
|
363 |
+
temp_ver = ver['version']
|
364 |
+
temp_ver = temp_ver.replace('_multimodal', '')
|
365 |
+
benchmark_ticks[pd.to_datetime(ver['release_date'])] = temp_ver ## MM benchmark dates considered after v1.6 (incl.)
|
366 |
+
|
367 |
+
print("benchmark_ticks")
|
368 |
+
print(benchmark_ticks)
|
369 |
fig = get_plot(df, start_date=START_DATE, end_date=datetime.now().strftime('%Y-%m-%d'), benchmark_ticks=benchmark_ticks, **plot_kwargs)
|
370 |
|
371 |
return fig
|
src/version_utils.py
CHANGED
@@ -9,18 +9,20 @@ import json
|
|
9 |
from io import StringIO
|
10 |
|
11 |
from src.leaderboard_utils import process_df
|
12 |
-
from src.assets.text_content import REPO
|
13 |
|
14 |
-
|
|
|
|
|
15 |
"""
|
16 |
Read and process data from CSV files of all available versions hosted on GitHub. - https://github.com/clembench/clembench-runs
|
17 |
|
18 |
Returns:
|
19 |
-
|
20 |
-
|
21 |
"""
|
22 |
base_repo = REPO
|
23 |
-
json_url = base_repo +
|
24 |
response = requests.get(json_url)
|
25 |
|
26 |
# Check if the JSON file request was successful
|
@@ -37,51 +39,44 @@ def get_versions_data():
|
|
37 |
reverse=True
|
38 |
)
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
)
|
45 |
-
formatted_date = datetime.strptime(latest_date, "%Y-%m-%d").strftime("%d %b %Y")
|
46 |
-
|
47 |
-
# Get Versions data
|
48 |
-
versions_data = {"latest": latest_version, "date": formatted_date}
|
49 |
-
|
50 |
-
# Collect Dataframes
|
51 |
-
dfs = []
|
52 |
|
53 |
for version in version_names:
|
54 |
-
|
55 |
-
|
56 |
-
quant_url = f"{base_repo}{version}_quantized/results.csv"
|
57 |
-
|
58 |
-
# Text Data
|
59 |
-
response = requests.get(text_url)
|
60 |
if response.status_code == 200:
|
61 |
df = pd.read_csv(StringIO(response.text))
|
62 |
df = process_df(df)
|
63 |
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
|
85 |
if __name__ == "__main__":
|
86 |
-
|
87 |
-
print(
|
|
|
9 |
from io import StringIO
|
10 |
|
11 |
from src.leaderboard_utils import process_df
|
12 |
+
from src.assets.text_content import REPO, BENCHMARK_FILE
|
13 |
|
14 |
+
VARIANTS = ['ascii', 'backends', 'quantized'] # Include other variants if added in the main clembench-runs repo
|
15 |
+
|
16 |
+
def get_version_data():
|
17 |
"""
|
18 |
Read and process data from CSV files of all available versions hosted on GitHub. - https://github.com/clembench/clembench-runs
|
19 |
|
20 |
Returns:
|
21 |
+
version_data:
|
22 |
-
|
23 |
"""
|
24 |
base_repo = REPO
|
25 |
+
json_url = base_repo + BENCHMARK_FILE
|
26 |
response = requests.get(json_url)
|
27 |
|
28 |
# Check if the JSON file request was successful
|
|
|
39 |
reverse=True
|
40 |
)
|
41 |
|
42 |
+
version_data = {
|
43 |
+
'versions': [],
|
44 |
+
'dataframes': []
|
45 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
for version in version_names:
|
48 |
+
base_url = f"{base_repo}{version}/results.csv"
|
49 |
+
response = requests.get(base_url)
|
|
|
|
|
|
|
|
|
50 |
if response.status_code == 200:
|
51 |
df = pd.read_csv(StringIO(response.text))
|
52 |
df = process_df(df)
|
53 |
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
|
54 |
+
version_data['dataframes'].append(df)
|
55 |
+
metadata = {
|
56 |
+
'name': version,
|
57 |
+
'last_updated': [datetime.strptime(v['last_updated'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version],
|
58 |
+
'release_date': [datetime.strptime(v['release_date'], '%Y-%m-%d').strftime("%d %b %Y") for v in versions if v['version'] == version]
|
59 |
+
}
|
60 |
+
version_data['versions'].append(metadata)
|
61 |
+
|
62 |
+
# Look for variant results file
|
63 |
+
version = version.split('_')[0] # Remove _multimodal suffix, and check for other suffixes
|
64 |
+
for suffix in VARIANTS:
|
65 |
+
base_url = f"{base_repo}{version}_{suffix}/results.csv"
|
66 |
+
response = requests.get(base_url)
|
67 |
+
if response.status_code == 200:
|
68 |
+
df = pd.read_csv(StringIO(response.text))
|
69 |
+
df = process_df(df)
|
70 |
+
df = df.sort_values(by=df.columns[1], ascending=False) # Sort by clemscore column
|
71 |
+
version_data['dataframes'].append(df)
|
72 |
+
metadata = {
|
73 |
+
'name': version + "_" + suffix # Skip Release date and last updated # Not included in becnhmark_runs.json
|
74 |
+
}
|
75 |
+
version_data['versions'].append(metadata)
|
76 |
+
|
77 |
+
return version_data
|
78 |
|
79 |
|
80 |
if __name__ == "__main__":
|
81 |
+
version_data = get_version_data()
|
82 |
+
print(version_data['versions'])
|