Spaces:

colab-potsdam
/

llm-calculator

Running

App Files Files Community

sherzod-hakimov commited on Feb 24

Commit

cac6844

1 Parent(s): 101e122

first commit

Browse files

Files changed (8) hide show

README.md +5 -6
app.py +368 -0
assets/pricing.json +212 -0
assets/text_content.py +62 -0
requirements.txt +5 -0
src/collect_data.py +152 -0
src/filter_utils.py +133 -0
src/process_data.py +206 -0

README.md CHANGED Viewed

@@ -1,13 +1,12 @@
 ---
-title: Llm Calculator
-emoji: 📊
-colorFrom: gray
-colorTo: purple
 sdk: gradio
-sdk_version: 5.17.1
 app_file: app.py
 pinned: false
-short_description: find the best LLM from multiple configurations
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: LLM-Calculator
+emoji: 🏆
+colorFrom: red
+colorTo: pink
 sdk: gradio
+sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,368 @@

+import pandas as pd
+import gradio as gr
+import os
+from gradio_rangeslider import RangeSlider
+import calendar
+import datetime
+import numpy as np
+from huggingface_hub import HfApi
+from apscheduler.schedulers.background import BackgroundScheduler
+from src.filter_utils import filter, filter_cols
+from src.process_data import merge_data
+import assets.text_content as tc
+"""
+CONSTANTS
+"""
+# For restarting the gradio application every 24 Hrs
+TIME = 86400  # in seconds # Reload will not work locally - requires HFToken # The app launches locally as expected - only without the reload utility
+"""
+AUTO RESTART HF SPACE
+"""
+HF_TOKEN = os.environ.get("H4_TOKEN", None)
+api = HfApi()
+def restart_space():
+    api.restart_space(repo_id=tc.HF_REPO, token=HF_TOKEN)
+# Main Leaderboard containing everything
+# text_leaderboard = pd.read_csv(os.path.join('assets', 'merged_data.csv'))
+text_leaderboard = merge_data()
+text_leaderboard = text_leaderboard.sort_values(by=tc.CLEMSCORE, ascending=False)
+# When displaying latency values
+text_leaderboard[tc.LATENCY] = text_leaderboard[tc.LATENCY].round(1)
+text_leaderboard[tc.CLEMSCORE] = text_leaderboard[tc.CLEMSCORE].round(1)
+open_weight_df = text_leaderboard[text_leaderboard[tc.OPEN_WEIGHT] == True]
+if not open_weight_df.empty:  # Check if filtered df is non-empty
+    # Get max parameter size, ignoring NaN values
+    params = open_weight_df[tc.PARAMS].dropna()
+    max_parameter_size = params.max() if not params.empty else 0
+# Short leaderboard containing fixed columns
+short_leaderboard = filter_cols(text_leaderboard)
+# html_table = short_leaderboard.to_html(escape=False, index=False)
+## Extract data
+langs = []
+licenses = []
+ip_prices = []
+op_prices = []
+latencies = []
+parameters = []
+contexts = []
+dates = []
+for i in range(len(text_leaderboard)):
+    lang_splits = text_leaderboard.iloc[i][tc.LANGS].split(',')
+    lang_splits = [s.strip() for s in lang_splits]
+    langs += lang_splits
+    license_name = text_leaderboard.iloc[i][tc.LICENSE_NAME]
+    licenses.append(license_name)
+    ip_prices.append(text_leaderboard.iloc[i][tc.INPUT])
+    op_prices.append(text_leaderboard.iloc[i][tc.OUTPUT])
+    latencies.append(text_leaderboard.iloc[i][tc.LATENCY])
+    parameters.append(text_leaderboard.iloc[i][tc.PARAMS])
+    contexts.append(text_leaderboard.iloc[i][tc.CONTEXT])
+    dates.append(text_leaderboard.iloc[i][tc.RELEASE_DATE])
+langs = list(set(langs))
+langs.sort()
+licenses = list(set(licenses))
+licenses.sort()
+max_input_price = max(ip_prices)
+max_output_price = max(op_prices)
+max_latency = text_leaderboard[tc.LATENCY].max().round(3)
+min_parameters = 0 if pd.isna(min(parameters)) else min(parameters)
+max_parameter = max_parameter_size
+parameter_step = 1
+min_context = min(contexts)
+max_context = max(contexts)
+context_step = 8
+min_date = min(dates)
+max_date = max(dates)
+# Date settings
+today = datetime.date.today()
+end_year = today.year
+start_year = tc.START_YEAR
+YEARS = list(range(int(start_year), int(end_year)+1))
+YEARS = [str(y) for y in YEARS]
+MONTHS = list(calendar.month_name[1:])
+TITLE = tc.TITLE
+llm_calc_app = gr.Blocks()
+with llm_calc_app:
+    gr.HTML(TITLE)
+    with gr.Row():
+        #####################################
+        # First Column
+        ####################################
+        ## Language Select
+        with gr.Column(scale=2):
+            with gr.Row():
+                lang_dropdown = gr.Dropdown(
+                    choices=langs,
+                    value=[],
+                    multiselect=True,
+                    label="Languages 🗣️"
+                )
+            ## Release Date range selection
+            with gr.Row():
+                start_year_dropdown = gr.Dropdown(
+                    choices = YEARS,
+                    value=[],
+                    label="Model Release - Year 🗓️"
+                )
+                start_month_dropdown = gr.Dropdown(
+                    choices = MONTHS,
+                    value=[],
+                    label="Month 📜"
+                )
+                end_year_dropdown = gr.Dropdown(
+                    choices = YEARS,
+                    value=[],
+                    label="End - Year 🗓️"
+                )
+                end_month_dropdown = gr.Dropdown(
+                    choices = MONTHS,
+                    value=[],
+                    label="Month 📜"
+                )
+            ## Price selection
+            with gr.Row():
+                input_pricing_slider = RangeSlider(
+                    minimum=0,
+                    maximum=max_input_price,
+                    value=(0, max_input_price),
+                    label="💲/1M input tokens",
+                    elem_id="double-slider-3"
+                )
+                output_pricing_slider = RangeSlider(
+                    minimum=0,
+                    maximum=max_output_price,
+                    value=(0, max_output_price),
+                    label="💲/1M output tokens",
+                    elem_id="double-slider-4"
+                )
+            # License selection
+            with gr.Row():
+                license_checkbox = gr.CheckboxGroup(
+                    choices=licenses,
+                    value=licenses,
+                    label="License 🛡️",
+                )
+        #############################################################
+        # Second Column
+        #############################################################
+        with gr.Column(scale=1):
+            ####### parameters ###########
+            with gr.Row():
+                parameter_slider = RangeSlider(
+                    minimum=0,
+                    maximum=max_parameter,
+                    label=f"Parameters 🔍 {int(min_parameters)}B - {int(max_parameter)}B+",
+                    elem_id="double-slider-1",
+                    step=parameter_step
+                )
+            ########### Context range ################
+            with gr.Row():
+                context_slider = RangeSlider(
+                    minimum=0,
+                    maximum=max_context,
+                    label="Context (k) 📏",
+                    elem_id="double-slider-2",
+                    step=context_step
+                )
+            ############# Modality selection checkbox ###############
+            with gr.Row():
+                multimodal_checkbox = gr.CheckboxGroup(
+                    choices=[tc.TEXT, tc.SINGLE_IMG, tc.MULT_IMG, tc.AUDIO, tc.VIDEO],
+                    value=[],
+                    label="Modalities 📝📷🎧🎬",
+                )
+            # ############### Model Type Checkbox ###############
+            with gr.Row():
+                open_weight_checkbox = gr.CheckboxGroup(
+                    choices=[tc.OPEN, tc.COMM],
+                    value=[tc.OPEN, tc.COMM],
+                    label="Model Type 🔓 💼",
+                )
+    with gr.Row():
+        """
+        Main Leaderboard Row
+        """
+        leaderboard_table = gr.Dataframe(
+                                value=short_leaderboard,
+                                elem_id="text-leaderboard-table",
+                                interactive=False,
+                                visible=True,
+                                datatype=['str', 'number', 'number', 'date', 'number', 'number', 'number', 'number', 'markdown']
+                            )
+        dummy_leaderboard_table = gr.Dataframe(
+                                value=text_leaderboard,
+                                elem_id="dummy-leaderboard-table",
+                                interactive=False,
+                                visible=False
+                            )
+        lang_dropdown.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        parameter_slider.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        input_pricing_slider.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        output_pricing_slider.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        multimodal_checkbox.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        open_weight_checkbox.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        context_slider.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        start_year_dropdown.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        start_month_dropdown.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        end_year_dropdown.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        end_month_dropdown.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+        license_checkbox.change(
+            filter,
+            [dummy_leaderboard_table, lang_dropdown, parameter_slider,
+             input_pricing_slider, output_pricing_slider, multimodal_checkbox,
+             context_slider, open_weight_checkbox, start_year_dropdown, start_month_dropdown, end_year_dropdown, end_month_dropdown, license_checkbox],
+            [leaderboard_table],
+            queue=True
+        )
+    llm_calc_app.load()
+llm_calc_app.queue()
+# Add scheduler to auto-restart the HF space at every TIME interval and update every component each time
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, 'interval', seconds=TIME)
+scheduler.start()
+# Log current start time and scheduled restart time
+print(datetime.datetime.now())
+print(f"Scheduled restart at {datetime.datetime.now() + datetime.timedelta(seconds=TIME)}")
+llm_calc_app.launch()

assets/pricing.json ADDED Viewed

	@@ -0,0 +1,212 @@

+[
+    {
+        "model_id": "gpt-4-1106-vision-preview",
+        "input": "10$",
+        "output": "30$"
+    },
+    {
+        "model_id": "gpt-4o-2024-05-13",
+        "input": "5$",
+        "output": "15$"
+    },
+    {
+        "model_id": "gpt-4o-2024-08-06",
+        "input": "3.750$",
+        "output": "15$"
+    },
+    {
+        "model_id": "gpt-4o-mini-2024-07-18",
+        "input": "0.300$",
+        "output": "1.200$"
+    },
+    {
+        "model_id": "gpt-4-turbo-2024-04-09",
+        "input": "10$",
+        "output": "30$"
+    },
+    {
+        "model_id": "gpt-4-1106-preview",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "gpt-4-0125-preview",
+        "input": "10$",
+        "output": "30$"
+    },
+    {
+        "model_id": "o1-preview-2024-09-12",
+        "input": "15$",
+        "output": "60$"
+    },
+    {
+        "model_id": "o1-mini-2024-09-12",
+        "input": "3$",
+        "output": "12$"
+    },
+    {
+        "model_id": "gpt-3.5-turbo-0125",
+        "input": "0.5$",
+        "output": "1.5$"
+    },
+    {
+        "model_id": "gpt-4-0613",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "gpt-4-0314",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "gpt-3.5-turbo-1106",
+        "input": "1$",
+        "output": "2$"
+    },
+    {
+        "model_id": "gpt-3.5-turbo-0613",
+        "input": "1.5$",
+        "output": "2$"
+    },
+    {
+        "model_id": "command",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "command-light",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "claude-v1.3",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "claude-v1.3-100k",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "claude-instant-1.2",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "claude-2",
+        "input": "8$",
+        "output": "24$"
+    },
+    {
+        "model_id": "claude-2.1",
+        "input": "8$",
+        "output": "24$"
+    },
+    {
+        "model_id": "claude-3-opus-20240229",
+        "input": "15$",
+        "output": "75$"
+    },
+    {
+        "model_id": "claude-3-sonnet-20240229",
+        "input": "3$",
+        "output": "15$"
+    },
+    {
+        "model_id": "claude-3-haiku-20240307",
+        "input": "0.25$",
+        "output": "1.25$"
+    },
+    {
+        "model_id": "claude-3-5-sonnet-20240620",
+        "input": "3$",
+        "output": "15$"
+    },
+    {
+        "model_id": "claude-3-5-haiku-20241022",
+        "input": "0.8$",
+        "output": "4$"
+    },
+    {
+        "model_id": "claude-3-5-sonnet-20241022",
+        "input": "3$",
+        "output": "15$"
+    },
+    {
+        "model_id": "gemini-1.0-pro-001",
+        "input": "0.5$",
+        "output": "1.5$"
+    },
+    {
+        "model_id": "gemini-1.0-pro-002",
+        "input": "0.5$",
+        "output": "1.5$"
+    },
+    {
+        "model_id": "gemini-1.0-pro-vision-latest",
+        "input": "0.5$",
+        "output": "1.5$"
+    },
+    {
+        "model_id": "gemini-1.5-flash-001",
+        "input": "0.075$",
+        "output": "0.3$"
+    },
+    {
+        "model_id": "gemini-1.5-pro-001",
+        "input": "1.25$",
+        "output": "5$"
+    },
+    {
+        "model_id": "gemini-1.5-pro-002",
+        "input": "1.25$",
+        "output": "5$"
+    },
+    {
+        "model_id": "gemini-1.5-flash-002",
+        "input": "0.075$",
+        "output": "0.3$"
+    },
+    {
+        "model_id": "gemini-1.5-flash-8b-001",
+        "input": "0.0375$",
+        "output": "0.15$"
+    },
+    {
+        "model_id": "gemini-2.0-flash-exp",
+        "input": "0$",
+        "output": "0$"
+    },
+    {
+        "model_id": "luminous-supreme-control",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "luminous-supreme",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "luminous-extended",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "luminous-base",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "luminous-base",
+        "input": "",
+        "output": ""
+    },
+    {
+        "model_id": "luminous-base",
+        "input": "",
+        "output": ""
+    }
+]

assets/text_content.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+# Data Sources
+CLEMBENCH_RUNS_REPO = "https://raw.githubusercontent.com/clembench/clembench-runs/main/"
+REGISTRY_URL = "https://raw.githubusercontent.com/clp-research/clembench/refs/heads/refactor_model_registry/backends/model_registry.json"
+BENCHMARK_FILE = "benchmark_runs.json"
+LATENCY_FOLDER = os.path.join("Addenda", "Latency")
+RESULT_FILE = "results.csv"
+LATENCY_SUFFIX = "_latency.csv"
+# Setup Column Names
+# Note - Changing this does not affect the already generated csv `merged_data.csv`
+# Run `src/process_data.py` for this
+DEFAULT_MODEL_NAME = "Unnamed: 0"
+DEFAULT_CLEMSCORE = "-, clemscore"
+MODEL_NAME = "Model Name"
+CLEMSCORE = "Clemscore"
+LATENCY = "Latency (s)"
+PARAMS = "Parameters (B)"
+DUMMY_PARAMS = "Parameters Dummy (B)"
+RELEASE_DATE = 'Release Date'
+OPEN_WEIGHT = 'Open Weight'
+LANGS = "Languages"
+CONTEXT = "Context Size (k)"
+LICENSE_NAME = "License Name"
+LICENSE_URL = "License URL"
+SINGLE_IMG = "Single Image"
+MULT_IMG = "Multi Image"
+TEXT = "Text-Only"
+AUDIO = "Audio"
+VIDEO = "Video"
+INPUT = "Input $/1M tokens"
+OUTPUT = "Output $/1M tokens"
+LICENSE = "License"
+TEMP_DATE = "Temp Date"
+# UI - HF Sapce
+OPEN = "Open-Weight"
+COMM = "Commercial"
+TITLE = """<h1 align="center" id="space-title"> LLM Calculator ⚖️⚡ 📏💰</h1> <p align="center">Performance, latency metrics are based on <a href="https://clembench.github.io/" target="_blank">clembench</a> .</p>"""
+HF_REPO = "colab-potsdam/llm-calculator"
+# Date Picker (set as Dropdown until datetime object is fixed)
+START_YEAR = "2020"
+MONTH_MAP = {
+    "January": 1,
+    "February": 2,
+    "March": 3,
+    "April": 4,
+    "May": 5,
+    "June": 6,
+    "July": 7,
+    "August": 8,
+    "September": 9,
+    "October": 10,
+    "November": 11,
+    "December": 12
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pandas==2.2.3
+gradio_rangeslider==0.0.7
+gradio==4.44.1
+pycountry==24.6.1
+apscheduler==3.10.4

src/collect_data.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""
+Collect data from the multiple sources and create a base datafranme for the LLMCalculator table
+Latency - https://github.com/clembench/clembench-runs/tree/main/Addenda/Latency
+Pricing - pricing.json
+Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/model_registry_updated.json
+"""
+import pandas as pd
+import json
+import requests
+from assets.text_content import CLEMBENCH_RUNS_REPO, REGISTRY_URL, BENCHMARK_FILE, LATENCY_FOLDER, RESULT_FILE, LATENCY_SUFFIX
+import os
+def validate_request(url: str, response) -> bool:
+    """
+    Validate if an HTTP request was successful.
+    Args:
+        url (str): The URL that was requested
+        response (requests.Response): The response object from the request
+    Returns:
+        bool: True if request was successful (status code 200), False otherwise
+    """
+    if response.status_code != 200:
+        print(f"Failed to read file - {url}. Status Code: {response.status_code}")
+        return False
+    return True
+def fetch_benchmark_data(benchmark: str = "text", version_names: list = []) -> tuple:
+    """
+    Fetch and parse benchmark results and latency data from CSV files.
+    Args:
+        benchmark (str): Type of benchmark to fetch ('text' or 'multimodal')
+        version_names (list): List of version names to search through, sorted by latest first
+    Returns:
+        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing:
+            - results_df: DataFrame with benchmark results
+            - latency_df: DataFrame with latency measurements
+            Returns (None, None) if no matching version is found or requests fail
+    Raises:
+        requests.RequestException: If there's an error fetching the data
+        pd.errors.EmptyDataError: If CSV file is empty
+        pd.errors.ParserError: If CSV parsing fails
+    """
+    for v in version_names:
+        # Check if version matches benchmark type
+        is_multimodal = 'multimodal' in v
+        if (benchmark == "multimodal") != is_multimodal:
+            continue
+        # Construct URLs
+        results_url = os.path.join(CLEMBENCH_RUNS_REPO, v, RESULT_FILE)
+        latency_url = os.path.join(CLEMBENCH_RUNS_REPO, LATENCY_FOLDER, v + LATENCY_SUFFIX)
+        try:
+            results = requests.get(results_url)
+            latency = requests.get(latency_url)
+            if validate_request(results_url, results) and validate_request(latency_url, latency):
+                # Convert the CSV content to pandas DataFrames
+                results_df = pd.read_csv(pd.io.common.StringIO(results.text))
+                latency_df = pd.read_csv(pd.io.common.StringIO(latency.text))
+                return results_df, latency_df
+        except requests.RequestException as e:
+            print(f"Error fetching data for version {v}: {e}")
+        except pd.errors.EmptyDataError:
+            print(f"Error: Empty CSV file found for version {v}")
+        except pd.errors.ParserError:
+            print(f"Error: Unable to parse CSV data for version {v}")
+    return None, None
+def fetch_version_metadata() -> tuple:
+    """
+    Fetch and process benchmark metadata from the Clembench GitHub repository.
+    The data is sourced from: https://github.com/clembench/clembench-runs
+    Configure the repository path in src/assets/text_content/CLEMBENCH_RUNS_REPO
+    Returns:
+        tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
+            - mm_result: Multimodal benchmark results
+            - mm_latency: Multimodal latency data
+            - text_result: Text benchmark results
+            - text_latency: Text latency data
+            Returns (None, None, None, None) if the request fails
+    """
+    json_url = CLEMBENCH_RUNS_REPO + BENCHMARK_FILE
+    response = requests.get(json_url)
+    # Check if the JSON file request was successful
+    if not validate_request(json_url, response):
+        return None, None, None, None
+    json_data = response.json()
+    versions = json_data['versions']
+    # Sort the versions in benchmark by latest first
+    version_names = sorted(
+        [ver['version'] for ver in versions],
+        key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
+        reverse=True
+    )
+    # Latency is in  seconds
+    mm_result, mm_latency = fetch_benchmark_data("multimodal", version_names)
+    text_result, text_latency = fetch_benchmark_data("text", version_names)
+    return mm_latency, mm_result, text_latency, text_result
+def fetch_registry_data() -> dict:
+    """
+    Fetch and parse model registry data from the Clembench registry URL.
+    The data is sourced from the model registry defined in REGISTRY_URL.
+    Contains information about various LLM models including their specifications
+    and capabilities.
+    Returns:
+        dict: Dictionary containing model registry data.
+        Returns None if the request fails or the JSON is invalid.
+    Raises:
+        requests.RequestException: If there's an error fetching the data
+        json.JSONDecodeError: If the response cannot be parsed as JSON
+    """
+    try:
+        response = requests.get(REGISTRY_URL)
+        if not validate_request(REGISTRY_URL, response):
+            return None
+        return response.json()
+    except requests.RequestException as e:
+        print(f"Error fetching registry data: {e}")
+    except json.JSONDecodeError as e:
+        print(f"Error parsing registry JSON: {e}")
+    return None
+if __name__=="__main__":
+    fetch_version_metadata()
+    registry_data = fetch_registry_data()
+    print(registry_data[0])

src/filter_utils.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# Utility functions for filtering the dataframe
+import pandas as pd
+import assets.text_content as tc
+import calendar
+from typing import Union, List
+from datetime import datetime
+current_year = str(datetime.now().year)
+def filter_cols(df):
+    df = df[[
+    tc.MODEL_NAME,
+    tc.CLEMSCORE,
+    tc.INPUT,
+    tc.OUTPUT,
+    tc.LATENCY,
+    tc.CONTEXT,
+    tc.PARAMS,
+    tc.RELEASE_DATE,
+    tc.LICENSE
+    ]]
+    return df
+def convert_date_components_to_timestamp(year: str, month: str) -> int:
+    """Convert year and month strings to timestamp."""
+    # Create a datetime object for the first day of the month
+    date_str = f"{year}-{month:02d}-01"
+    return int(pd.to_datetime(date_str).timestamp())
+def filter_by_date(df: pd.DataFrame,
+                  start_year, start_month,
+                  end_year, end_month,
+                  date_column: str = tc.RELEASE_DATE) -> pd.DataFrame:
+    """
+    Filter DataFrame by date range using separate year and month components.
+    """
+    # All lists are passed at once, so set default values here instead of passing them in args- Overwritten by empty lists
+    if not start_year:
+        start_year = tc.START_YEAR
+    if not end_year:
+        end_year = current_year
+    if not start_month:
+        start_month = "January"
+    if not end_month:
+        end_month = "December"
+    try:
+        # Convert string inputs to integers for date creation
+        start_timestamp = convert_date_components_to_timestamp(
+            int(start_year),
+            int(tc.MONTH_MAP[start_month])
+        )
+        end_timestamp = convert_date_components_to_timestamp(
+            int(end_year),
+            int(tc.MONTH_MAP[end_month])
+        )
+        # Convert the DataFrame's date column to timestamps for comparison
+        date_timestamps = pd.to_datetime(df[date_column]).apply(lambda x: int(x.timestamp()))
+        # Filter the DataFrame
+        return df[
+            (date_timestamps >= start_timestamp) &
+            (date_timestamps <= end_timestamp)
+        ]
+    except (ValueError, TypeError) as e:
+        print(f"Error processing dates: {e}")
+        return df  # Return unfiltered DataFrame if there's an error
+def filter(df, language_list, parameters, input_price, output_price, multimodal,
+           context, open_weight,
+           start_year, start_month, end_year, end_month,
+           license ):
+    if not df.empty:  # Check if df is non-empty
+        df = df[df[tc.LANGS].apply(lambda x: all(lang in x for lang in language_list))]
+    if not df.empty:
+        df = df[(df[tc.DUMMY_PARAMS] >= parameters[0]) & (df[tc.DUMMY_PARAMS] <= parameters[1])]
+    if not df.empty:  # Check if df is non-empty
+        df = df[(df[tc.INPUT] >= input_price[0]) & (df[tc.INPUT] <= input_price[1])]
+    if not df.empty:  # Check if df is non-empty
+        df = df[(df[tc.OUTPUT] >= output_price[0]) & (df[tc.OUTPUT] <= output_price[1])]
+    if not df.empty:  # Check if df is non-empty
+        if tc.TEXT in multimodal:
+            df = df[(df[tc.SINGLE_IMG] == False) & (df[tc.MULT_IMG] == False) & (df[tc.AUDIO] == False) & (df[tc.VIDEO] == False) ]
+        if tc.SINGLE_IMG in multimodal:
+            df = df[df[tc.SINGLE_IMG] == True]
+        if tc.MULT_IMG in multimodal:
+            df = df[df[tc.MULT_IMG] == True]
+        if tc.AUDIO in multimodal:
+            df = df[df[tc.AUDIO] == True]
+        if tc.VIDEO in multimodal:
+            df = df[df[tc.VIDEO] == True]
+    if not df.empty:  # Check if df is non-empty
+        # Convert 'Context Size (k)' to numeric, coercing errors to NaN
+        context_size = pd.to_numeric(df['Context Size (k)'], errors='coerce').fillna(0)
+        # Apply the filter
+        df = df[(context_size >= context[0]) & (context_size <= context[1])]
+    if not df.empty:  # Check if df is non-empty
+        if tc.OPEN in open_weight and tc.COMM not in open_weight:
+            df = df[df[tc.OPEN_WEIGHT] == True]
+        elif tc.COMM in open_weight and tc.OPEN not in open_weight:
+            df = df[df[tc.OPEN_WEIGHT] == False]
+        elif tc.OPEN not in open_weight and tc.COMM not in open_weight:
+            # Return empty DataFrame with same columns
+            df = pd.DataFrame(columns=df.columns)
+    if not df.empty:  # Check if df is non-empty
+        df = df[df[tc.LICENSE_NAME].apply(lambda x: any(lic in x for lic in license))]
+    df = filter_by_date(df, start_year, start_month, end_year, end_month, tc.TEMP_DATE)
+    df = filter_cols(df)
+    df = df.sort_values(by=tc.CLEMSCORE, ascending=False)
+    return df  # Return the filtered dataframe

src/process_data.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import pandas as pd
+import json
+import os
+import pycountry
+import re
+from src.collect_data import fetch_version_metadata, fetch_registry_data
+import assets.text_content as tc
+PRICING_PATH = os.path.join('assets', 'pricing.json')
+# Convert parameters to float, handling both B and T suffixes
+def convert_parameters(param):
+    if pd.isna(param) or param == '':
+        return None
+    param = str(param)
+    if 'T' in param:
+        return float(param.replace('T', '')) * 1000
+    return float(param.replace('B', ''))
+# Clean price strings by removing '$' and handling empty strings
+def clean_price(price):
+    if pd.isna(price) or price == '':
+        return None
+    return float(price.replace('$', ''))
+# Handle language mapping for both string and list inputs
+def map_languages(languages):
+    if isinstance(languages, float) and pd.isna(languages):
+        return None
+    def get_language_name(lang):
+        # Clean and standardize the language code
+        lang = str(lang).strip().lower()
+        # Try to find the language
+        try:
+            # First try as language code (en, fr, etc.)
+            language = pycountry.languages.get(alpha_2=lang)
+            if not language:
+                # Try as language name (English, French, etc.)
+                language = pycountry.languages.get(name=lang.capitalize())
+            return language.name if language else lang
+        except (AttributeError, LookupError):
+            return lang
+    # Handle different input types
+    if isinstance(languages, list):
+        lang_list = languages
+    elif isinstance(languages, str):
+        lang_list = [l.strip() for l in languages.split(',')]
+    else:
+        try:
+            lang_list = list(languages)
+        except:
+            return str(languages)
+    # Map all languages and join them
+    return ', '.join(get_language_name(lang) for lang in lang_list)
+# Extract multimodality fields
+def get_multimodality_field(model_data, field):
+    try:
+        return model_data.get('model_config', {}).get('multimodality', {}).get(field, False)
+    except:
+        return False
+def clean_model_name(model_name: str) -> str:
+    """Clean model name by removing temperature suffix pattern."""
+    # Match pattern like -t0.0--, -t0.7--, -t1.0--, etc.
+    pattern = r'-t[0-1]\.[0-9]--'
+    return re.split(pattern, model_name)[0]
+def merge_data():
+    mm_latency_df, mm_result_df, text_latency_df, text_result_df = fetch_version_metadata()
+    registry_data = fetch_registry_data()
+    with open(PRICING_PATH, 'r') as f:
+        pricing_data = json.load(f)
+    # Ensure the unnamed column is renamed to 'model'
+    mm_result_df.rename(columns={tc.DEFAULT_MODEL_NAME: 'model', tc.DEFAULT_CLEMSCORE: 'clemscore'}, inplace=True)
+    text_result_df.rename(columns={tc.DEFAULT_MODEL_NAME: 'model', tc.DEFAULT_CLEMSCORE: 'clemscore'}, inplace=True)
+    mm_result_df['model'] = mm_result_df['model'].apply(clean_model_name)
+    text_result_df['model'] = text_result_df['model'].apply(clean_model_name)
+    # Merge datasets to compute average values
+    avg_latency_df = pd.concat([mm_latency_df, text_latency_df], axis=0).groupby('model')['latency'].mean().reset_index()
+    avg_clemscore_df = pd.concat([mm_result_df, text_result_df], axis=0).groupby('model')['clemscore'].mean().reset_index()
+    # Merge latency, clemscore, registry, and pricing data
+    lat_clem_df = pd.merge(avg_latency_df, avg_clemscore_df, on='model', how='outer')
+    # Convert registry_data to DataFrame for easier merging
+    registry_df = pd.DataFrame(registry_data)
+    # Extract license info
+    registry_df['license_name'] = registry_df['license'].apply(lambda x: x['name'])
+    registry_df['license_url'] = registry_df['license'].apply(lambda x: x['url'])
+    # Add individual multimodality columns
+    registry_df['single_image'] = registry_df.apply(lambda x: get_multimodality_field(x, 'single_image'), axis=1)
+    registry_df['multiple_images'] = registry_df.apply(lambda x: get_multimodality_field(x, 'multiple_images'), axis=1)
+    registry_df['audio'] = registry_df.apply(lambda x: get_multimodality_field(x, 'audio'), axis=1)
+    registry_df['video'] = registry_df.apply(lambda x: get_multimodality_field(x, 'video'), axis=1)
+    # Update columns list to include new multimodality fields
+    registry_df = registry_df[[
+        'model_name', 'parameters', 'release_date', 'open_weight',
+        'languages', 'context_size', 'license_name', 'license_url',
+        'single_image', 'multiple_images', 'audio', 'video'
+    ]]
+    # Merge with previous data
+    merged_df = pd.merge(
+        lat_clem_df,
+        registry_df,
+        left_on='model',
+        right_on='model_name',
+        how='inner'
+    )
+    # Update column renaming
+    merged_df = merged_df.rename(columns={
+        'model': tc.MODEL_NAME,
+        'latency': tc.LATENCY,
+        'clemscore': tc.CLEMSCORE,
+        'parameters': tc.PARAMS,
+        'release_date': tc.RELEASE_DATE,
+        'open_weight': tc.OPEN_WEIGHT,
+        'languages': tc.LANGS,
+        'context_size': tc.CONTEXT,
+        'license_name': tc.LICENSE_NAME,
+        'license_url': tc.LICENSE_URL,
+        'single_image': tc.SINGLE_IMG,
+        'multiple_images': tc.MULT_IMG,
+        'audio': tc.AUDIO,
+        'video': tc.VIDEO
+    })
+    # Convert pricing_data list to DataFrame
+    pricing_df = pd.DataFrame(pricing_data)
+    pricing_df['input'] = pricing_df['input'].apply(clean_price)
+    pricing_df['output'] = pricing_df['output'].apply(clean_price)
+    # Merge pricing data with the existing dataframe
+    merged_df = pd.merge(
+        merged_df,
+        pricing_df,
+        left_on='Model Name',
+        right_on='model_id',
+        how='left'
+    )
+    # Drop duplicate model column and rename price columns
+    merged_df = merged_df.drop('model_id', axis=1)
+    merged_df = merged_df.rename(columns={
+        'input': tc.INPUT,
+        'output': tc.OUTPUT
+    })
+    # Fill NaN values with 0.0 for pricing columns
+    merged_df[tc.INPUT] = merged_df[tc.INPUT].fillna(0.0)
+    merged_df[tc.OUTPUT] = merged_df[tc.OUTPUT].fillna(0.0)
+    # Convert parameters and set to None for commercial models
+    merged_df[tc.PARAMS] = merged_df.apply(
+        lambda row: None if not row[tc.OPEN_WEIGHT] else convert_parameters(row[tc.PARAMS]),
+        axis=1
+    )
+    merged_df[tc.LICENSE] = merged_df.apply(
+        lambda row: f'[{row[tc.LICENSE_NAME]}]({row[tc.LICENSE_URL]})', axis=1
+    )
+    merged_df[tc.TEMP_DATE] = merged_df[tc.RELEASE_DATE]
+    merged_df[tc.LANGS] = merged_df[tc.LANGS].apply(map_languages)
+    # Sort by Clemscore in descending order
+    merged_df = merged_df.sort_values(by=tc.CLEMSCORE, ascending=False)
+    # Drop model_name column
+    merged_df.drop(columns=['model_name'], inplace=True)
+    # Clean up context and convert to integer
+    merged_df[tc.CONTEXT] = merged_df[tc.CONTEXT].astype(str).str.replace('k', '', regex=False)
+    merged_df[tc.CONTEXT] = pd.to_numeric(merged_df[tc.CONTEXT], errors='coerce').fillna(0).astype(int)
+    # Handle commercial model parameters / Set to max of open models
+    # Find the maximum value of tc.PARAMS where tc.OPEN_WEIGHT is True
+    max_params_value = merged_df.loc[merged_df[tc.OPEN_WEIGHT], tc.PARAMS].max()
+    # Create a new dummy PARAM column
+    merged_df[tc.DUMMY_PARAMS] = merged_df.apply(
+        lambda row: max_params_value if not row[tc.OPEN_WEIGHT] else row[tc.PARAMS],
+        axis=1
+    )
+    return merged_df
+if __name__=='__main__':
+    merged_df = merge_data()
+    # # Save to CSV
+    output_path = os.path.join('assets', 'merged_data.csv')
+    merged_df.to_csv(output_path, index=False)