sherzod-hakimov commited on
Commit
cac6844
Β·
1 Parent(s): 101e122

first commit

Browse files
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
- title: Llm Calculator
3
- emoji: πŸ“Š
4
- colorFrom: gray
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.17.1
8
  app_file: app.py
9
  pinned: false
10
- short_description: find the best LLM from multiple configurations
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: LLM-Calculator
3
+ emoji: πŸ†
4
+ colorFrom: red
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import os
4
+ from gradio_rangeslider import RangeSlider
5
+ import calendar
6
+ import datetime
7
+ import numpy as np
8
+ from huggingface_hub import HfApi
9
+ from apscheduler.schedulers.background import BackgroundScheduler
10
+
11
+ from src.filter_utils import filter, filter_cols
12
+ from src.process_data import merge_data
13
+ import assets.text_content as tc
14
+
15
+ """
16
+ CONSTANTS
17
+ """
18
+ # For restarting the gradio application every 24 Hrs
19
+ TIME = 86400 # in seconds # Reload will not work locally - requires HFToken # The app launches locally as expected - only without the reload utility
20
+
21
+ """
22
+ AUTO RESTART HF SPACE
23
+ """
24
+ HF_TOKEN = os.environ.get("H4_TOKEN", None)
25
+ api = HfApi()
26
+
27
+ def restart_space():
28
+ api.restart_space(repo_id=tc.HF_REPO, token=HF_TOKEN)
29
+
30
+
31
+
32
+ # Main Leaderboard containing everything
33
+ # text_leaderboard = pd.read_csv(os.path.join('assets', 'merged_data.csv'))
34
+ text_leaderboard = merge_data()
35
+ text_leaderboard = text_leaderboard.sort_values(by=tc.CLEMSCORE, ascending=False)
36
+
37
+ # When displaying latency values
38
+ text_leaderboard[tc.LATENCY] = text_leaderboard[tc.LATENCY].round(1)
39
+ text_leaderboard[tc.CLEMSCORE] = text_leaderboard[tc.CLEMSCORE].round(1)
40
+
41
+ open_weight_df = text_leaderboard[text_leaderboard[tc.OPEN_WEIGHT] == True]
42
+ if not open_weight_df.empty: # Check if filtered df is non-empty
43
+ # Get max parameter size, ignoring NaN values
44
+ params = open_weight_df[tc.PARAMS].dropna()
45
+ max_parameter_size = params.max() if not params.empty else 0
46
+
47
+ # Short leaderboard containing fixed columns
48
+ short_leaderboard = filter_cols(text_leaderboard)
49
+ # html_table = short_leaderboard.to_html(escape=False, index=False)
50
+
51
+ ## Extract data
52
+ langs = []
53
+ licenses = []
54
+ ip_prices = []
55
+ op_prices = []
56
+ latencies = []
57
+ parameters = []
58
+ contexts = []
59
+ dates = []
60
+
61
+ for i in range(len(text_leaderboard)):
62
+ lang_splits = text_leaderboard.iloc[i][tc.LANGS].split(',')
63
+ lang_splits = [s.strip() for s in lang_splits]
64
+ langs += lang_splits
65
+ license_name = text_leaderboard.iloc[i][tc.LICENSE_NAME]
66
+
67
+ licenses.append(license_name)
68
+ ip_prices.append(text_leaderboard.iloc[i][tc.INPUT])
69
+ op_prices.append(text_leaderboard.iloc[i][tc.OUTPUT])
70
+ latencies.append(text_leaderboard.iloc[i][tc.LATENCY])
71
+ parameters.append(text_leaderboard.iloc[i][tc.PARAMS])
72
+ contexts.append(text_leaderboard.iloc[i][tc.CONTEXT])
73
+ dates.append(text_leaderboard.iloc[i][tc.RELEASE_DATE])
74
+
75
+
76
+ langs = list(set(langs))
77
+ langs.sort()
78
+
79
+ licenses = list(set(licenses))
80
+ licenses.sort()
81
+
82
+ max_input_price = max(ip_prices)
83
+ max_output_price = max(op_prices)
84
+ max_latency = text_leaderboard[tc.LATENCY].max().round(3)
85
+
86
+ min_parameters = 0 if pd.isna(min(parameters)) else min(parameters)
87
+ max_parameter = max_parameter_size
88
+ parameter_step = 1
89
+
90
+ min_context = min(contexts)
91
+ max_context = max(contexts)
92
+ context_step = 8
93
+
94
+ min_date = min(dates)
95
+ max_date = max(dates)
96
+
97
+ # Date settings
98
+ today = datetime.date.today()
99
+ end_year = today.year
100
+ start_year = tc.START_YEAR
101
+
102
+ YEARS = list(range(int(start_year), int(end_year)+1))
103
+ YEARS = [str(y) for y in YEARS]
104
+ MONTHS = list(calendar.month_name[1:])
105
+
106
+ TITLE = tc.TITLE
107
+
108
+ llm_calc_app = gr.Blocks()
109
+ with llm_calc_app:
110
+
111
+ gr.HTML(TITLE)
112
+
113
+ with gr.Row():
114
+
115
+ #####################################
116
+ # First Column
117
+ ####################################
118
+ ## Language Select
119
+ with gr.Column(scale=2):
120
+
121
+ with gr.Row():
122
+ lang_dropdown = gr.Dropdown(
123
+ choices=langs,
124
+ value=[],
125
+ multiselect=True,
126
+ label="Languages πŸ—£οΈ"
127
+ )
128
+
129
+
130
+ ## Release Date range selection
131
+
132
+ with gr.Row():
133
+ start_year_dropdown = gr.Dropdown(
134
+ choices = YEARS,
135
+ value=[],
136
+ label="Model Release - Year πŸ—“οΈ"
137
+ )
138
+ start_month_dropdown = gr.Dropdown(
139
+ choices = MONTHS,
140
+ value=[],
141
+ label="Month πŸ“œ"
142
+ )
143
+
144
+ end_year_dropdown = gr.Dropdown(
145
+ choices = YEARS,
146
+ value=[],
147
+ label="End - Year πŸ—“οΈ"
148
+ )
149
+ end_month_dropdown = gr.Dropdown(
150
+ choices = MONTHS,
151
+ value=[],
152
+ label="Month πŸ“œ"
153
+ )
154
+
155
+ ## Price selection
156
+ with gr.Row():
157
+
158
+ input_pricing_slider = RangeSlider(
159
+ minimum=0,
160
+ maximum=max_input_price,
161
+ value=(0, max_input_price),
162
+ label="πŸ’²/1M input tokens",
163
+ elem_id="double-slider-3"
164
+ )
165
+
166
+ output_pricing_slider = RangeSlider(
167
+ minimum=0,
168
+ maximum=max_output_price,
169
+ value=(0, max_output_price),
170
+ label="πŸ’²/1M output tokens",
171
+ elem_id="double-slider-4"
172
+ )
173
+
174
+ # License selection
175
+ with gr.Row():
176
+ license_checkbox = gr.CheckboxGroup(
177
+ choices=licenses,
178
+ value=licenses,
179
+ label="License πŸ›‘οΈ",
180
+ )
181
+
182
+ #############################################################
183
+ # Second Column
184
+ #############################################################
185
+ with gr.Column(scale=1):
186
+
187
+ ####### parameters ###########
188
+ with gr.Row():
189
+ parameter_slider = RangeSlider(
190
+ minimum=0,
191
+ maximum=max_parameter,
192
+ label=f"Parameters πŸ” {int(min_parameters)}B - {int(max_parameter)}B+",
193
+ elem_id="double-slider-1",
194
+ step=parameter_step
195
+ )
196
+
197
+
198
+ ########### Context range ################
199
+
200
+ with gr.Row():
201
+ context_slider = RangeSlider(
202
+ minimum=0,
203
+ maximum=max_context,
204
+ label="Context (k) πŸ“",
205
+ elem_id="double-slider-2",
206
+ step=context_step
207
+ )
208
+
209
+ ############# Modality selection checkbox ###############
210
+ with gr.Row():
211
+ multimodal_checkbox = gr.CheckboxGroup(
212
+ choices=[tc.TEXT, tc.SINGLE_IMG, tc.MULT_IMG, tc.AUDIO, tc.VIDEO],
213
+ value=[],
214
+ label="Modalities πŸ“πŸ“·πŸŽ§πŸŽ¬",
215
+ )
216
+
217
+
218
+ # ############### Model Type Checkbox ###############
219
+ with gr.Row():
220
+ open_weight_checkbox = gr.CheckboxGroup(
221
+ choices=[tc.OPEN, tc.COMM],
222
+ value=[tc.OPEN, tc.COMM],
223
+ label="Model Type πŸ”“ πŸ’Ό",
224
+ )
225
+
226
+
227
+
228
+ with gr.Row():
229
+ """
230
+ Main Leaderboard Row
231
+ """
232
+
233
+ leaderboard_table = gr.Dataframe(
234
+ value=short_leaderboard,
235
+ elem_id="text-leaderboard-table",
236
+ interactive=False,
237
+ visible=True,
238
+ datatype=['str', 'number', 'number', 'date', 'number', 'number', 'number', 'number', 'markdown']
239
+ )
240
+
241
+ dummy_leaderboard_table = gr.Dataframe(
242
+ value=text_leaderboard,
243
+ elem_id="dummy-leaderboard-table",
244
+ interactive=False,
245
+ visible=False
246
+ )
247
+
248
+ lang_dropdown.change(
249
+ filter,
250
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
251
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
252
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
253
+ [leaderboard_table],
254
+ queue=True
255
+ )
256
+
257
+ parameter_slider.change(
258
+ filter,
259
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
260
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
261
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
262
+ [leaderboard_table],
263
+ queue=True
264
+ )
265
+
266
+ input_pricing_slider.change(
267
+ filter,
268
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
269
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
270
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
271
+ [leaderboard_table],
272
+ queue=True
273
+ )
274
+
275
+ output_pricing_slider.change(
276
+ filter,
277
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
278
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
279
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
280
+ [leaderboard_table],
281
+ queue=True
282
+ )
283
+
284
+ multimodal_checkbox.change(
285
+ filter,
286
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
287
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
288
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
289
+ [leaderboard_table],
290
+ queue=True
291
+ )
292
+
293
+ open_weight_checkbox.change(
294
+ filter,
295
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
296
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
297
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
298
+ [leaderboard_table],
299
+ queue=True
300
+ )
301
+
302
+ context_slider.change(
303
+ filter,
304
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
305
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
306
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
307
+ [leaderboard_table],
308
+ queue=True
309
+ )
310
+
311
+ start_year_dropdown.change(
312
+ filter,
313
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
314
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
315
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
316
+ [leaderboard_table],
317
+ queue=True
318
+ )
319
+
320
+ start_month_dropdown.change(
321
+ filter,
322
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
323
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
324
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
325
+ [leaderboard_table],
326
+ queue=True
327
+ )
328
+
329
+ end_year_dropdown.change(
330
+ filter,
331
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
332
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
333
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
334
+ [leaderboard_table],
335
+ queue=True
336
+ )
337
+
338
+ end_month_dropdown.change(
339
+ filter,
340
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
341
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
342
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
343
+ [leaderboard_table],
344
+ queue=True
345
+ )
346
+
347
+ license_checkbox.change(
348
+ filter,
349
+ [dummy_leaderboard_table, lang_dropdown, parameter_slider,
350
+ input_pricing_slider, output_pricing_slider, multimodal_checkbox,
351
+ context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
352
+ [leaderboard_table],
353
+ queue=True
354
+ )
355
+
356
+ llm_calc_app.load()
357
+ llm_calc_app.queue()
358
+
359
+ # Add scheduler to auto-restart the HF space at every TIME interval and update every component each time
360
+ scheduler = BackgroundScheduler()
361
+ scheduler.add_job(restart_space, 'interval', seconds=TIME)
362
+ scheduler.start()
363
+
364
+ # Log current start time and scheduled restart time
365
+ print(datetime.datetime.now())
366
+ print(f"Scheduled restart at {datetime.datetime.now() + datetime.timedelta(seconds=TIME)}")
367
+
368
+ llm_calc_app.launch()
assets/pricing.json ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "model_id": "gpt-4-1106-vision-preview",
4
+ "input": "10$",
5
+ "output": "30$"
6
+ },
7
+ {
8
+ "model_id": "gpt-4o-2024-05-13",
9
+ "input": "5$",
10
+ "output": "15$"
11
+ },
12
+ {
13
+ "model_id": "gpt-4o-2024-08-06",
14
+ "input": "3.750$",
15
+ "output": "15$"
16
+ },
17
+ {
18
+ "model_id": "gpt-4o-mini-2024-07-18",
19
+ "input": "0.300$",
20
+ "output": "1.200$"
21
+ },
22
+ {
23
+ "model_id": "gpt-4-turbo-2024-04-09",
24
+ "input": "10$",
25
+ "output": "30$"
26
+ },
27
+ {
28
+ "model_id": "gpt-4-1106-preview",
29
+ "input": "",
30
+ "output": ""
31
+ },
32
+ {
33
+ "model_id": "gpt-4-0125-preview",
34
+ "input": "10$",
35
+ "output": "30$"
36
+ },
37
+ {
38
+ "model_id": "o1-preview-2024-09-12",
39
+ "input": "15$",
40
+ "output": "60$"
41
+ },
42
+ {
43
+ "model_id": "o1-mini-2024-09-12",
44
+ "input": "3$",
45
+ "output": "12$"
46
+ },
47
+ {
48
+ "model_id": "gpt-3.5-turbo-0125",
49
+ "input": "0.5$",
50
+ "output": "1.5$"
51
+ },
52
+ {
53
+ "model_id": "gpt-4-0613",
54
+ "input": "",
55
+ "output": ""
56
+ },
57
+ {
58
+ "model_id": "gpt-4-0314",
59
+ "input": "",
60
+ "output": ""
61
+ },
62
+ {
63
+ "model_id": "gpt-3.5-turbo-1106",
64
+ "input": "1$",
65
+ "output": "2$"
66
+ },
67
+ {
68
+ "model_id": "gpt-3.5-turbo-0613",
69
+ "input": "1.5$",
70
+ "output": "2$"
71
+ },
72
+ {
73
+ "model_id": "command",
74
+ "input": "",
75
+ "output": ""
76
+ },
77
+ {
78
+ "model_id": "command-light",
79
+ "input": "",
80
+ "output": ""
81
+ },
82
+ {
83
+ "model_id": "claude-v1.3",
84
+ "input": "",
85
+ "output": ""
86
+ },
87
+ {
88
+ "model_id": "claude-v1.3-100k",
89
+ "input": "",
90
+ "output": ""
91
+ },
92
+ {
93
+ "model_id": "claude-instant-1.2",
94
+ "input": "",
95
+ "output": ""
96
+ },
97
+ {
98
+ "model_id": "claude-2",
99
+ "input": "8$",
100
+ "output": "24$"
101
+ },
102
+ {
103
+ "model_id": "claude-2.1",
104
+ "input": "8$",
105
+ "output": "24$"
106
+ },
107
+ {
108
+ "model_id": "claude-3-opus-20240229",
109
+ "input": "15$",
110
+ "output": "75$"
111
+ },
112
+ {
113
+ "model_id": "claude-3-sonnet-20240229",
114
+ "input": "3$",
115
+ "output": "15$"
116
+ },
117
+ {
118
+ "model_id": "claude-3-haiku-20240307",
119
+ "input": "0.25$",
120
+ "output": "1.25$"
121
+ },
122
+ {
123
+ "model_id": "claude-3-5-sonnet-20240620",
124
+ "input": "3$",
125
+ "output": "15$"
126
+ },
127
+ {
128
+ "model_id": "claude-3-5-haiku-20241022",
129
+ "input": "0.8$",
130
+ "output": "4$"
131
+ },
132
+ {
133
+ "model_id": "claude-3-5-sonnet-20241022",
134
+ "input": "3$",
135
+ "output": "15$"
136
+ },
137
+ {
138
+ "model_id": "gemini-1.0-pro-001",
139
+ "input": "0.5$",
140
+ "output": "1.5$"
141
+ },
142
+ {
143
+ "model_id": "gemini-1.0-pro-002",
144
+ "input": "0.5$",
145
+ "output": "1.5$"
146
+ },
147
+ {
148
+ "model_id": "gemini-1.0-pro-vision-latest",
149
+ "input": "0.5$",
150
+ "output": "1.5$"
151
+ },
152
+ {
153
+ "model_id": "gemini-1.5-flash-001",
154
+ "input": "0.075$",
155
+ "output": "0.3$"
156
+ },
157
+ {
158
+ "model_id": "gemini-1.5-pro-001",
159
+ "input": "1.25$",
160
+ "output": "5$"
161
+ },
162
+ {
163
+ "model_id": "gemini-1.5-pro-002",
164
+ "input": "1.25$",
165
+ "output": "5$"
166
+ },
167
+ {
168
+ "model_id": "gemini-1.5-flash-002",
169
+ "input": "0.075$",
170
+ "output": "0.3$"
171
+ },
172
+ {
173
+ "model_id": "gemini-1.5-flash-8b-001",
174
+ "input": "0.0375$",
175
+ "output": "0.15$"
176
+ },
177
+ {
178
+ "model_id": "gemini-2.0-flash-exp",
179
+ "input": "0$",
180
+ "output": "0$"
181
+ },
182
+ {
183
+ "model_id": "luminous-supreme-control",
184
+ "input": "",
185
+ "output": ""
186
+ },
187
+ {
188
+ "model_id": "luminous-supreme",
189
+ "input": "",
190
+ "output": ""
191
+ },
192
+ {
193
+ "model_id": "luminous-extended",
194
+ "input": "",
195
+ "output": ""
196
+ },
197
+ {
198
+ "model_id": "luminous-base",
199
+ "input": "",
200
+ "output": ""
201
+ },
202
+ {
203
+ "model_id": "luminous-base",
204
+ "input": "",
205
+ "output": ""
206
+ },
207
+ {
208
+ "model_id": "luminous-base",
209
+ "input": "",
210
+ "output": ""
211
+ }
212
+ ]
assets/text_content.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # Data Sources
4
+ CLEMBENCH_RUNS_REPO = "https://raw.githubusercontent.com/clembench/clembench-runs/main/"
5
+ REGISTRY_URL = "https://raw.githubusercontent.com/clp-research/clembench/refs/heads/refactor_model_registry/backends/model_registry.json"
6
+ BENCHMARK_FILE = "benchmark_runs.json"
7
+
8
+ LATENCY_FOLDER = os.path.join("Addenda", "Latency")
9
+ RESULT_FILE = "results.csv"
10
+ LATENCY_SUFFIX = "_latency.csv"
11
+
12
+ # Setup Column Names
13
+ # Note - Changing this does not affect the already generated csv `merged_data.csv`
14
+ # Run `src/process_data.py` for this
15
+
16
+ DEFAULT_MODEL_NAME = "Unnamed: 0"
17
+ DEFAULT_CLEMSCORE = "-, clemscore"
18
+
19
+ MODEL_NAME = "Model Name"
20
+ CLEMSCORE = "Clemscore"
21
+ LATENCY = "Latency (s)"
22
+ PARAMS = "Parameters (B)"
23
+ DUMMY_PARAMS = "Parameters Dummy (B)"
24
+ RELEASE_DATE = 'Release Date'
25
+ OPEN_WEIGHT = 'Open Weight'
26
+ LANGS = "Languages"
27
+ CONTEXT = "Context Size (k)"
28
+ LICENSE_NAME = "License Name"
29
+ LICENSE_URL = "License URL"
30
+ SINGLE_IMG = "Single Image"
31
+ MULT_IMG = "Multi Image"
32
+ TEXT = "Text-Only"
33
+ AUDIO = "Audio"
34
+ VIDEO = "Video"
35
+ INPUT = "Input $/1M tokens"
36
+ OUTPUT = "Output $/1M tokens"
37
+ LICENSE = "License"
38
+ TEMP_DATE = "Temp Date"
39
+
40
+ # UI - HF Sapce
41
+ OPEN = "Open-Weight"
42
+ COMM = "Commercial"
43
+
44
+ TITLE = """<h1 align="center" id="space-title"> LLM Calculator βš–οΈβš‘ πŸ“πŸ’°</h1> <p align="center">Performance, latency metrics are based on <a href="https://clembench.github.io/" target="_blank">clembench</a> .</p>"""
45
+
46
+ HF_REPO = "colab-potsdam/llm-calculator"
47
+ # Date Picker (set as Dropdown until datetime object is fixed)
48
+ START_YEAR = "2020"
49
+ MONTH_MAP = {
50
+ "January": 1,
51
+ "February": 2,
52
+ "March": 3,
53
+ "April": 4,
54
+ "May": 5,
55
+ "June": 6,
56
+ "July": 7,
57
+ "August": 8,
58
+ "September": 9,
59
+ "October": 10,
60
+ "November": 11,
61
+ "December": 12
62
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pandas==2.2.3
2
+ gradio_rangeslider==0.0.7
3
+ gradio==4.44.1
4
+ pycountry==24.6.1
5
+ apscheduler==3.10.4
src/collect_data.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Collect data from the multiple sources and create a base datafranme for the LLMCalculator table
3
+ Latency - https://github.com/clembench/clembench-runs/tree/main/Addenda/Latency
4
+ Pricing - pricing.json
5
+ Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/model_registry_updated.json
6
+ """
7
+
8
+ import pandas as pd
9
+ import json
10
+ import requests
11
+ from assets.text_content import CLEMBENCH_RUNS_REPO, REGISTRY_URL, BENCHMARK_FILE, LATENCY_FOLDER, RESULT_FILE, LATENCY_SUFFIX
12
+ import os
13
+
14
+ def validate_request(url: str, response) -> bool:
15
+ """
16
+ Validate if an HTTP request was successful.
17
+
18
+ Args:
19
+ url (str): The URL that was requested
20
+ response (requests.Response): The response object from the request
21
+
22
+ Returns:
23
+ bool: True if request was successful (status code 200), False otherwise
24
+ """
25
+
26
+ if response.status_code != 200:
27
+ print(f"Failed to read file - {url}. Status Code: {response.status_code}")
28
+ return False
29
+ return True
30
+
31
+ def fetch_benchmark_data(benchmark: str = "text", version_names: list = []) -> tuple:
32
+ """
33
+ Fetch and parse benchmark results and latency data from CSV files.
34
+
35
+ Args:
36
+ benchmark (str): Type of benchmark to fetch ('text' or 'multimodal')
37
+ version_names (list): List of version names to search through, sorted by latest first
38
+
39
+ Returns:
40
+ tuple[pd.DataFrame, pd.DataFrame]: A tuple containing:
41
+ - results_df: DataFrame with benchmark results
42
+ - latency_df: DataFrame with latency measurements
43
+ Returns (None, None) if no matching version is found or requests fail
44
+
45
+ Raises:
46
+ requests.RequestException: If there's an error fetching the data
47
+ pd.errors.EmptyDataError: If CSV file is empty
48
+ pd.errors.ParserError: If CSV parsing fails
49
+ """
50
+ for v in version_names:
51
+ # Check if version matches benchmark type
52
+ is_multimodal = 'multimodal' in v
53
+ if (benchmark == "multimodal") != is_multimodal:
54
+ continue
55
+
56
+ # Construct URLs
57
+ results_url = os.path.join(CLEMBENCH_RUNS_REPO, v, RESULT_FILE)
58
+ latency_url = os.path.join(CLEMBENCH_RUNS_REPO, LATENCY_FOLDER, v + LATENCY_SUFFIX)
59
+
60
+ try:
61
+ results = requests.get(results_url)
62
+ latency = requests.get(latency_url)
63
+
64
+ if validate_request(results_url, results) and validate_request(latency_url, latency):
65
+ # Convert the CSV content to pandas DataFrames
66
+ results_df = pd.read_csv(pd.io.common.StringIO(results.text))
67
+ latency_df = pd.read_csv(pd.io.common.StringIO(latency.text))
68
+ return results_df, latency_df
69
+
70
+ except requests.RequestException as e:
71
+ print(f"Error fetching data for version {v}: {e}")
72
+ except pd.errors.EmptyDataError:
73
+ print(f"Error: Empty CSV file found for version {v}")
74
+ except pd.errors.ParserError:
75
+ print(f"Error: Unable to parse CSV data for version {v}")
76
+
77
+ return None, None
78
+
79
+ def fetch_version_metadata() -> tuple:
80
+ """
81
+ Fetch and process benchmark metadata from the Clembench GitHub repository.
82
+
83
+ The data is sourced from: https://github.com/clembench/clembench-runs
84
+ Configure the repository path in src/assets/text_content/CLEMBENCH_RUNS_REPO
85
+
86
+ Returns:
87
+ tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
88
+ - mm_result: Multimodal benchmark results
89
+ - mm_latency: Multimodal latency data
90
+ - text_result: Text benchmark results
91
+ - text_latency: Text latency data
92
+ Returns (None, None, None, None) if the request fails
93
+ """
94
+ json_url = CLEMBENCH_RUNS_REPO + BENCHMARK_FILE
95
+ response = requests.get(json_url)
96
+
97
+ # Check if the JSON file request was successful
98
+ if not validate_request(json_url, response):
99
+ return None, None, None, None
100
+
101
+ json_data = response.json()
102
+ versions = json_data['versions']
103
+
104
+ # Sort the versions in benchmark by latest first
105
+ version_names = sorted(
106
+ [ver['version'] for ver in versions],
107
+ key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
108
+ reverse=True
109
+ )
110
+
111
+ # Latency is in seconds
112
+ mm_result, mm_latency = fetch_benchmark_data("multimodal", version_names)
113
+ text_result, text_latency = fetch_benchmark_data("text", version_names)
114
+
115
+ return mm_latency, mm_result, text_latency, text_result
116
+
117
+ def fetch_registry_data() -> dict:
118
+ """
119
+ Fetch and parse model registry data from the Clembench registry URL.
120
+
121
+ The data is sourced from the model registry defined in REGISTRY_URL.
122
+ Contains information about various LLM models including their specifications
123
+ and capabilities.
124
+
125
+ Returns:
126
+ dict: Dictionary containing model registry data.
127
+ Returns None if the request fails or the JSON is invalid.
128
+
129
+ Raises:
130
+ requests.RequestException: If there's an error fetching the data
131
+ json.JSONDecodeError: If the response cannot be parsed as JSON
132
+ """
133
+ try:
134
+ response = requests.get(REGISTRY_URL)
135
+ if not validate_request(REGISTRY_URL, response):
136
+ return None
137
+
138
+ return response.json()
139
+
140
+ except requests.RequestException as e:
141
+ print(f"Error fetching registry data: {e}")
142
+ except json.JSONDecodeError as e:
143
+ print(f"Error parsing registry JSON: {e}")
144
+
145
+ return None
146
+
147
+ if __name__=="__main__":
148
+ fetch_version_metadata()
149
+ registry_data = fetch_registry_data()
150
+ print(registry_data[0])
151
+
152
+
src/filter_utils.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utility functions for filtering the dataframe
2
+
3
+ import pandas as pd
4
+ import assets.text_content as tc
5
+ import calendar
6
+ from typing import Union, List
7
+ from datetime import datetime
8
+
9
+ current_year = str(datetime.now().year)
10
+
11
+ def filter_cols(df):
12
+
13
+ df = df[[
14
+ tc.MODEL_NAME,
15
+ tc.CLEMSCORE,
16
+ tc.INPUT,
17
+ tc.OUTPUT,
18
+ tc.LATENCY,
19
+ tc.CONTEXT,
20
+ tc.PARAMS,
21
+ tc.RELEASE_DATE,
22
+ tc.LICENSE
23
+ ]]
24
+
25
+ return df
26
+
27
+
28
+ def convert_date_components_to_timestamp(year: str, month: str) -> int:
29
+ """Convert year and month strings to timestamp."""
30
+ # Create a datetime object for the first day of the month
31
+ date_str = f"{year}-{month:02d}-01"
32
+ return int(pd.to_datetime(date_str).timestamp())
33
+
34
+ def filter_by_date(df: pd.DataFrame,
35
+ start_year, start_month,
36
+ end_year, end_month,
37
+ date_column: str = tc.RELEASE_DATE) -> pd.DataFrame:
38
+ """
39
+ Filter DataFrame by date range using separate year and month components.
40
+ """
41
+ # All lists are passed at once, so set default values here instead of passing them in args- Overwritten by empty lists
42
+ if not start_year:
43
+ start_year = tc.START_YEAR
44
+ if not end_year:
45
+ end_year = current_year
46
+
47
+ if not start_month:
48
+ start_month = "January"
49
+ if not end_month:
50
+ end_month = "December"
51
+
52
+ try:
53
+ # Convert string inputs to integers for date creation
54
+ start_timestamp = convert_date_components_to_timestamp(
55
+ int(start_year),
56
+ int(tc.MONTH_MAP[start_month])
57
+ )
58
+
59
+ end_timestamp = convert_date_components_to_timestamp(
60
+ int(end_year),
61
+ int(tc.MONTH_MAP[end_month])
62
+ )
63
+
64
+ # Convert the DataFrame's date column to timestamps for comparison
65
+ date_timestamps = pd.to_datetime(df[date_column]).apply(lambda x: int(x.timestamp()))
66
+
67
+ # Filter the DataFrame
68
+ return df[
69
+ (date_timestamps >= start_timestamp) &
70
+ (date_timestamps <= end_timestamp)
71
+ ]
72
+ except (ValueError, TypeError) as e:
73
+ print(f"Error processing dates: {e}")
74
+ return df # Return unfiltered DataFrame if there's an error
75
+
76
+
77
+ def filter(df, language_list, parameters, input_price, output_price, multimodal,
78
+ context, open_weight,
79
+ start_year, start_month, end_year, end_month,
80
+ license ):
81
+
82
+
83
+ if not df.empty: # Check if df is non-empty
84
+ df = df[df[tc.LANGS].apply(lambda x: all(lang in x for lang in language_list))]
85
+
86
+ if not df.empty:
87
+ df = df[(df[tc.DUMMY_PARAMS] >= parameters[0]) & (df[tc.DUMMY_PARAMS] <= parameters[1])]
88
+
89
+ if not df.empty: # Check if df is non-empty
90
+ df = df[(df[tc.INPUT] >= input_price[0]) & (df[tc.INPUT] <= input_price[1])]
91
+
92
+ if not df.empty: # Check if df is non-empty
93
+ df = df[(df[tc.OUTPUT] >= output_price[0]) & (df[tc.OUTPUT] <= output_price[1])]
94
+
95
+ if not df.empty: # Check if df is non-empty
96
+ if tc.TEXT in multimodal:
97
+ df = df[(df[tc.SINGLE_IMG] == False) & (df[tc.MULT_IMG] == False) & (df[tc.AUDIO] == False) & (df[tc.VIDEO] == False) ]
98
+ if tc.SINGLE_IMG in multimodal:
99
+ df = df[df[tc.SINGLE_IMG] == True]
100
+ if tc.MULT_IMG in multimodal:
101
+ df = df[df[tc.MULT_IMG] == True]
102
+ if tc.AUDIO in multimodal:
103
+ df = df[df[tc.AUDIO] == True]
104
+ if tc.VIDEO in multimodal:
105
+ df = df[df[tc.VIDEO] == True]
106
+
107
+ if not df.empty: # Check if df is non-empty
108
+ # Convert 'Context Size (k)' to numeric, coercing errors to NaN
109
+ context_size = pd.to_numeric(df['Context Size (k)'], errors='coerce').fillna(0)
110
+
111
+ # Apply the filter
112
+ df = df[(context_size >= context[0]) & (context_size <= context[1])]
113
+
114
+ if not df.empty: # Check if df is non-empty
115
+ if tc.OPEN in open_weight and tc.COMM not in open_weight:
116
+ df = df[df[tc.OPEN_WEIGHT] == True]
117
+ elif tc.COMM in open_weight and tc.OPEN not in open_weight:
118
+ df = df[df[tc.OPEN_WEIGHT] == False]
119
+ elif tc.OPEN not in open_weight and tc.COMM not in open_weight:
120
+ # Return empty DataFrame with same columns
121
+ df = pd.DataFrame(columns=df.columns)
122
+
123
+ if not df.empty: # Check if df is non-empty
124
+ df = df[df[tc.LICENSE_NAME].apply(lambda x: any(lic in x for lic in license))]
125
+
126
+ df = filter_by_date(df, start_year, start_month, end_year, end_month, tc.TEMP_DATE)
127
+
128
+ df = filter_cols(df)
129
+ df = df.sort_values(by=tc.CLEMSCORE, ascending=False)
130
+
131
+ return df # Return the filtered dataframe
132
+
133
+
src/process_data.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import json
3
+ import os
4
+ import pycountry
5
+ import re
6
+
7
+ from src.collect_data import fetch_version_metadata, fetch_registry_data
8
+ import assets.text_content as tc
9
+
10
+ PRICING_PATH = os.path.join('assets', 'pricing.json')
11
+
12
+ # Convert parameters to float, handling both B and T suffixes
13
+ def convert_parameters(param):
14
+ if pd.isna(param) or param == '':
15
+ return None
16
+ param = str(param)
17
+ if 'T' in param:
18
+ return float(param.replace('T', '')) * 1000
19
+ return float(param.replace('B', ''))
20
+
21
+ # Clean price strings by removing '$' and handling empty strings
22
+ def clean_price(price):
23
+ if pd.isna(price) or price == '':
24
+ return None
25
+ return float(price.replace('$', ''))
26
+
27
+ # Handle language mapping for both string and list inputs
28
+ def map_languages(languages):
29
+ if isinstance(languages, float) and pd.isna(languages):
30
+ return None
31
+
32
+ def get_language_name(lang):
33
+ # Clean and standardize the language code
34
+ lang = str(lang).strip().lower()
35
+
36
+ # Try to find the language
37
+ try:
38
+ # First try as language code (en, fr, etc.)
39
+ language = pycountry.languages.get(alpha_2=lang)
40
+ if not language:
41
+ # Try as language name (English, French, etc.)
42
+ language = pycountry.languages.get(name=lang.capitalize())
43
+
44
+ return language.name if language else lang
45
+ except (AttributeError, LookupError):
46
+ return lang
47
+
48
+ # Handle different input types
49
+ if isinstance(languages, list):
50
+ lang_list = languages
51
+ elif isinstance(languages, str):
52
+ lang_list = [l.strip() for l in languages.split(',')]
53
+ else:
54
+ try:
55
+ lang_list = list(languages)
56
+ except:
57
+ return str(languages)
58
+
59
+ # Map all languages and join them
60
+ return ', '.join(get_language_name(lang) for lang in lang_list)
61
+
62
+ # Extract multimodality fields
63
+ def get_multimodality_field(model_data, field):
64
+ try:
65
+ return model_data.get('model_config', {}).get('multimodality', {}).get(field, False)
66
+ except:
67
+ return False
68
+
69
+ def clean_model_name(model_name: str) -> str:
70
+ """Clean model name by removing temperature suffix pattern."""
71
+ # Match pattern like -t0.0--, -t0.7--, -t1.0--, etc.
72
+ pattern = r'-t[0-1]\.[0-9]--'
73
+ return re.split(pattern, model_name)[0]
74
+
75
+ def merge_data():
76
+
77
+ mm_latency_df, mm_result_df, text_latency_df, text_result_df = fetch_version_metadata()
78
+ registry_data = fetch_registry_data()
79
+ with open(PRICING_PATH, 'r') as f:
80
+ pricing_data = json.load(f)
81
+
82
+ # Ensure the unnamed column is renamed to 'model'
83
+ mm_result_df.rename(columns={tc.DEFAULT_MODEL_NAME: 'model', tc.DEFAULT_CLEMSCORE: 'clemscore'}, inplace=True)
84
+ text_result_df.rename(columns={tc.DEFAULT_MODEL_NAME: 'model', tc.DEFAULT_CLEMSCORE: 'clemscore'}, inplace=True)
85
+ mm_result_df['model'] = mm_result_df['model'].apply(clean_model_name)
86
+ text_result_df['model'] = text_result_df['model'].apply(clean_model_name)
87
+
88
+ # Merge datasets to compute average values
89
+ avg_latency_df = pd.concat([mm_latency_df, text_latency_df], axis=0).groupby('model')['latency'].mean().reset_index()
90
+ avg_clemscore_df = pd.concat([mm_result_df, text_result_df], axis=0).groupby('model')['clemscore'].mean().reset_index()
91
+
92
+ # Merge latency, clemscore, registry, and pricing data
93
+ lat_clem_df = pd.merge(avg_latency_df, avg_clemscore_df, on='model', how='outer')
94
+
95
+ # Convert registry_data to DataFrame for easier merging
96
+ registry_df = pd.DataFrame(registry_data)
97
+
98
+ # Extract license info
99
+ registry_df['license_name'] = registry_df['license'].apply(lambda x: x['name'])
100
+ registry_df['license_url'] = registry_df['license'].apply(lambda x: x['url'])
101
+
102
+ # Add individual multimodality columns
103
+ registry_df['single_image'] = registry_df.apply(lambda x: get_multimodality_field(x, 'single_image'), axis=1)
104
+ registry_df['multiple_images'] = registry_df.apply(lambda x: get_multimodality_field(x, 'multiple_images'), axis=1)
105
+ registry_df['audio'] = registry_df.apply(lambda x: get_multimodality_field(x, 'audio'), axis=1)
106
+ registry_df['video'] = registry_df.apply(lambda x: get_multimodality_field(x, 'video'), axis=1)
107
+
108
+ # Update columns list to include new multimodality fields
109
+ registry_df = registry_df[[
110
+ 'model_name', 'parameters', 'release_date', 'open_weight',
111
+ 'languages', 'context_size', 'license_name', 'license_url',
112
+ 'single_image', 'multiple_images', 'audio', 'video'
113
+ ]]
114
+
115
+ # Merge with previous data
116
+ merged_df = pd.merge(
117
+ lat_clem_df,
118
+ registry_df,
119
+ left_on='model',
120
+ right_on='model_name',
121
+ how='inner'
122
+ )
123
+
124
+ # Update column renaming
125
+ merged_df = merged_df.rename(columns={
126
+ 'model': tc.MODEL_NAME,
127
+ 'latency': tc.LATENCY,
128
+ 'clemscore': tc.CLEMSCORE,
129
+ 'parameters': tc.PARAMS,
130
+ 'release_date': tc.RELEASE_DATE,
131
+ 'open_weight': tc.OPEN_WEIGHT,
132
+ 'languages': tc.LANGS,
133
+ 'context_size': tc.CONTEXT,
134
+ 'license_name': tc.LICENSE_NAME,
135
+ 'license_url': tc.LICENSE_URL,
136
+ 'single_image': tc.SINGLE_IMG,
137
+ 'multiple_images': tc.MULT_IMG,
138
+ 'audio': tc.AUDIO,
139
+ 'video': tc.VIDEO
140
+ })
141
+
142
+ # Convert pricing_data list to DataFrame
143
+ pricing_df = pd.DataFrame(pricing_data)
144
+ pricing_df['input'] = pricing_df['input'].apply(clean_price)
145
+ pricing_df['output'] = pricing_df['output'].apply(clean_price)
146
+
147
+ # Merge pricing data with the existing dataframe
148
+ merged_df = pd.merge(
149
+ merged_df,
150
+ pricing_df,
151
+ left_on='Model Name',
152
+ right_on='model_id',
153
+ how='left'
154
+ )
155
+
156
+ # Drop duplicate model column and rename price columns
157
+ merged_df = merged_df.drop('model_id', axis=1)
158
+ merged_df = merged_df.rename(columns={
159
+ 'input': tc.INPUT,
160
+ 'output': tc.OUTPUT
161
+ })
162
+
163
+ # Fill NaN values with 0.0 for pricing columns
164
+ merged_df[tc.INPUT] = merged_df[tc.INPUT].fillna(0.0)
165
+ merged_df[tc.OUTPUT] = merged_df[tc.OUTPUT].fillna(0.0)
166
+
167
+ # Convert parameters and set to None for commercial models
168
+ merged_df[tc.PARAMS] = merged_df.apply(
169
+ lambda row: None if not row[tc.OPEN_WEIGHT] else convert_parameters(row[tc.PARAMS]),
170
+ axis=1
171
+ )
172
+
173
+ merged_df[tc.LICENSE] = merged_df.apply(
174
+ lambda row: f'[{row[tc.LICENSE_NAME]}]({row[tc.LICENSE_URL]})', axis=1
175
+ )
176
+ merged_df[tc.TEMP_DATE] = merged_df[tc.RELEASE_DATE]
177
+
178
+ merged_df[tc.LANGS] = merged_df[tc.LANGS].apply(map_languages)
179
+
180
+ # Sort by Clemscore in descending order
181
+ merged_df = merged_df.sort_values(by=tc.CLEMSCORE, ascending=False)
182
+
183
+ # Drop model_name column
184
+ merged_df.drop(columns=['model_name'], inplace=True)
185
+
186
+ # Clean up context and convert to integer
187
+ merged_df[tc.CONTEXT] = merged_df[tc.CONTEXT].astype(str).str.replace('k', '', regex=False)
188
+ merged_df[tc.CONTEXT] = pd.to_numeric(merged_df[tc.CONTEXT], errors='coerce').fillna(0).astype(int)
189
+
190
+ # Handle commercial model parameters / Set to max of open models
191
+ # Find the maximum value of tc.PARAMS where tc.OPEN_WEIGHT is True
192
+ max_params_value = merged_df.loc[merged_df[tc.OPEN_WEIGHT], tc.PARAMS].max()
193
+
194
+ # Create a new dummy PARAM column
195
+ merged_df[tc.DUMMY_PARAMS] = merged_df.apply(
196
+ lambda row: max_params_value if not row[tc.OPEN_WEIGHT] else row[tc.PARAMS],
197
+ axis=1
198
+ )
199
+
200
+ return merged_df
201
+
202
+ if __name__=='__main__':
203
+ merged_df = merge_data()
204
+ # # Save to CSV
205
+ output_path = os.path.join('assets', 'merged_data.csv')
206
+ merged_df.to_csv(output_path, index=False)