Hannah commited on
Commit
ff17adc
·
1 Parent(s): cce0ed9

Revert "initial"

Browse files

This reverts commit cce0ed9c2ee4e552d2f504d06f28ed0a29c10cb8.

Files changed (7) hide show
  1. README copy.md +0 -75
  2. api.py +0 -250
  3. hf.svg +0 -10
  4. requirements.txt +0 -30
  5. run.py +0 -280
  6. run_all.py +0 -68
  7. style.css +0 -44
README copy.md DELETED
@@ -1,75 +0,0 @@
1
- # LLM Leaderboard Demo
2
-
3
- This demo showcases a modern LLM leaderboard application built with Gradio and FastAPI.
4
-
5
- ## Features
6
-
7
- - Displays a sortable, filterable table of LLM models and their performance
8
- - Shows scores across multiple benchmarks
9
- - External links to model documentation for both open and closed models
10
- - Dynamic filtering by model performance categories
11
- - Real-time data refresh functionality
12
- - Last-updated timestamp display
13
- - Automatic startup of both API and frontend
14
-
15
- ## Installation
16
-
17
- First, install the required dependencies:
18
-
19
- ```bash
20
- pip install -r requirements.txt
21
- ```
22
-
23
- ## Usage
24
-
25
- There are two ways to run the application:
26
-
27
- ### Option 1: Using run.py (all-in-one)
28
-
29
- ```bash
30
- python run.py
31
- ```
32
-
33
- This will start both the FastAPI backend and Gradio interface in a single process.
34
-
35
- ### Option 2: Using run_all.py (separate processes)
36
-
37
- ```bash
38
- python run_all.py
39
- ```
40
-
41
- This runs the API server and Gradio interface in separate processes, making it easier to debug each component.
42
-
43
- With either option, the application will be available at:
44
- - Frontend: http://localhost:7860
45
- - API: http://localhost:8000
46
-
47
- ## Architecture
48
-
49
- The application consists of two main components:
50
-
51
- 1. **FastAPI Backend** (`api.py`):
52
- - Provides API endpoints for leaderboard data
53
- - Caches data to improve performance
54
- - Handles data processing and filtering
55
-
56
- 2. **Gradio Frontend** (`run.py`):
57
- - Creates an interactive web interface
58
- - Fetches data from the API
59
- - Provides filtering and search capabilities
60
-
61
- ## API Endpoints
62
-
63
- - `GET /api/leaderboard` - Get the full leaderboard data
64
- - `GET /api/leaderboard?refresh=true` - Force refresh data from source
65
- - `GET /api/models` - Get a list of all model names
66
- - `GET /api/model/{model_name}` - Get details for a specific model
67
- - `GET /api/filters` - Get counts for different filter categories
68
-
69
- ## Customization
70
-
71
- You can modify the appearance by editing the CSS in `run.py`. The application uses Gradio's theme system for styling.
72
-
73
- ## License
74
-
75
- This project is licensed under the MIT License.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api.py DELETED
@@ -1,250 +0,0 @@
1
- import json
2
- from pathlib import Path
3
- from typing import Dict, List, Optional
4
-
5
- import numpy as np
6
- import requests
7
- from fastapi import FastAPI, HTTPException, Query
8
- from fastapi.middleware.cors import CORSMiddleware
9
- from pydantic import BaseModel
10
-
11
-
12
- class LeaderboardModel(BaseModel):
13
- model_name: str
14
- type: str
15
- model_link: Optional[str] = None
16
- scores: Dict[str, float]
17
- co2_cost: Optional[float] = None
18
-
19
- class LeaderboardData(BaseModel):
20
- models: List[LeaderboardModel]
21
- updated_at: str
22
-
23
- app = FastAPI(
24
- title="LLM Leaderboard API",
25
- description="API for serving Open LLM Leaderboard data",
26
- version="1.0.0"
27
- )
28
-
29
- # Add CORS middleware to allow requests from your Gradio app
30
- app.add_middleware(
31
- CORSMiddleware,
32
- allow_origins=["*"], # For production, specify your exact frontend URL
33
- allow_credentials=True,
34
- allow_methods=["*"],
35
- allow_headers=["*"],
36
- )
37
-
38
- # Cache for leaderboard data
39
- cached_data = None
40
- cache_file = Path("leaderboard_cache.json")
41
-
42
- def fetch_external_leaderboard_data(refresh: bool = False) -> Optional[Dict]:
43
- """
44
- Fetch leaderboard data from external sources like HuggingFace.
45
- Uses local cache if available and refresh is False.
46
- """
47
- global cached_data
48
-
49
- if not refresh and cached_data:
50
- return cached_data
51
-
52
- if not refresh and cache_file.exists():
53
- try:
54
- with open(cache_file) as f:
55
- cached_data = json.load(f)
56
- return cached_data
57
- except:
58
- pass # Fall back to fetching if cache read fails
59
-
60
- try:
61
- # Try different endpoints that might contain leaderboard data
62
- endpoints = [
63
- "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/raw/main/leaderboard_data.json",
64
- "https://huggingface.co/api/spaces/HuggingFaceH4/open_llm_leaderboard/api/get_results",
65
- ]
66
-
67
- for url in endpoints:
68
- response = requests.get(url)
69
- if response.status_code == 200:
70
- data = response.json()
71
- cached_data = data
72
- with open(cache_file, "w") as f:
73
- json.dump(data, f)
74
- return data
75
-
76
- # If all endpoints fail, return None
77
- return None
78
- except Exception as e:
79
- print(f"Error fetching external leaderboard data: {e}")
80
- return None
81
-
82
- def generate_sample_data() -> Dict:
83
- """
84
- Generate sample leaderboard data when external data can't be fetched.
85
- """
86
- models = [
87
- {"model_name": "meta-llama/llama-3-70b-instruct", "type": "open"},
88
- {"model_name": "mistralai/Mistral-7B-Instruct-v0.3", "type": "open"},
89
- {"model_name": "google/gemma-7b-it", "type": "open"},
90
- {"model_name": "Qwen/Qwen2-7B-Instruct", "type": "open"},
91
- {"model_name": "anthropic/claude-3-opus", "type": "closed", "external_link": "https://www.anthropic.com/claude"},
92
- {"model_name": "OpenAI/gpt-4o", "type": "closed", "external_link": "https://openai.com/gpt-4"},
93
- {"model_name": "01-ai/Yi-1.5-34B-Chat", "type": "open"},
94
- {"model_name": "google/gemma-2b", "type": "open"},
95
- {"model_name": "microsoft/phi-3-mini-4k-instruct", "type": "open"},
96
- {"model_name": "microsoft/phi-3-mini-128k-instruct", "type": "open"},
97
- {"model_name": "stabilityai/stable-beluga-7b", "type": "open"},
98
- {"model_name": "togethercomputer/RedPajama-INCITE-7B-Instruct", "type": "open"},
99
- {"model_name": "databricks/dbrx-instruct", "type": "closed", "external_link": "https://www.databricks.com/product/machine-learning/large-language-models"},
100
- {"model_name": "mosaicml/mpt-7b-instruct", "type": "open"},
101
- {"model_name": "01-ai/Yi-1.5-9B-Chat", "type": "open"},
102
- {"model_name": "anthropic/claude-3-sonnet", "type": "closed", "external_link": "https://www.anthropic.com/claude"},
103
- {"model_name": "cohere/command-r-plus", "type": "closed", "external_link": "https://cohere.com/models/command-r-plus"},
104
- {"model_name": "meta-llama/llama-3-8b-instruct", "type": "open"}
105
- ]
106
-
107
- np.random.seed(42) # For reproducibility
108
-
109
- model_data = []
110
- for model_info in models:
111
- model_name = model_info["model_name"]
112
- model_type = model_info["type"]
113
- external_link = model_info.get("external_link", None)
114
-
115
- # Generate random scores
116
- average = round(np.random.uniform(40, 90), 2)
117
- ifeval = round(np.random.uniform(30, 90), 2)
118
- bbhi = round(np.random.uniform(40, 85), 2)
119
- math = round(np.random.uniform(20, 80), 2)
120
- gpqa = round(np.random.uniform(10, 70), 2)
121
- mujb = round(np.random.uniform(10, 70), 2)
122
- mmlu = round(np.random.uniform(40, 85), 2)
123
- co2_cost = round(np.random.uniform(1, 100), 2)
124
-
125
- # If it's an open model, it should have a link to Hugging Face
126
- model_link = None
127
- if external_link:
128
- model_link = external_link
129
- elif "/" in model_name:
130
- model_link = f"https://huggingface.co/{model_name}"
131
- else:
132
- model_link = f"https://huggingface.co/models?search={model_name}"
133
-
134
- model_data.append({
135
- "model_name": model_name,
136
- "type": model_type,
137
- "model_link": model_link,
138
- "scores": {
139
- "average": average,
140
- "ifeval": ifeval,
141
- "bbhi": bbhi,
142
- "math": math,
143
- "gpqa": gpqa,
144
- "mujb": mujb,
145
- "mmlu": mmlu
146
- },
147
- "co2_cost": co2_cost
148
- })
149
-
150
- # Sort by average score
151
- model_data.sort(key=lambda x: x["scores"]["average"], reverse=True)
152
-
153
- # Create the final data structure
154
- from datetime import datetime
155
- leaderboard_data = {
156
- "models": model_data,
157
- "updated_at": datetime.now().isoformat()
158
- }
159
-
160
- return leaderboard_data
161
-
162
- @app.get("/")
163
- def read_root():
164
- return {"message": "Welcome to the LLM Leaderboard API"}
165
-
166
- @app.get("/api/leaderboard", response_model=LeaderboardData)
167
- def get_leaderboard(refresh: bool = Query(False, description="Force refresh data from source")):
168
- """
169
- Get the full leaderboard data.
170
- If refresh is True, force fetch from source instead of using cache.
171
- """
172
- external_data = fetch_external_leaderboard_data(refresh=refresh)
173
-
174
- if external_data:
175
- # Process external data to match our expected format
176
- try:
177
- # Here you would transform the external data to match LeaderboardData model
178
- # This is a simplified example - you'd need to adapt this to the actual structure
179
- return external_data
180
- except Exception as e:
181
- print(f"Error processing external data: {e}")
182
-
183
- # Fall back to sample data if external data can't be processed
184
- return generate_sample_data()
185
-
186
- @app.get("/api/models", response_model=List[str])
187
- def get_models():
188
- """Get a list of all model names in the leaderboard"""
189
- data = fetch_external_leaderboard_data() or generate_sample_data()
190
- return [model["model_name"] for model in data["models"]]
191
-
192
- @app.get("/api/model/{model_name}", response_model=LeaderboardModel)
193
- def get_model_details(model_name: str):
194
- """Get detailed information about a specific model"""
195
- data = fetch_external_leaderboard_data() or generate_sample_data()
196
-
197
- for model in data["models"]:
198
- if model["model_name"] == model_name:
199
- return model
200
-
201
- raise HTTPException(status_code=404, detail=f"Model {model_name} not found")
202
-
203
- @app.get("/api/filters")
204
- def get_filter_counts():
205
- """
206
- Get counts for different filter categories to display in the UI.
207
- This matches what's shown in the 'Quick Filters' section of the leaderboard.
208
- """
209
- data = fetch_external_leaderboard_data() or generate_sample_data()
210
-
211
- # Count models by different categories
212
- edge_count = 0
213
- consumer_count = 0
214
- midrange_count = 0
215
- gpu_rich_count = 0
216
- official_count = 0
217
-
218
- for model in data["models"]:
219
- # Edge devices (typically small models)
220
- if "scores" in model and model["scores"].get("average", 0) < 45:
221
- edge_count += 1
222
-
223
- # Consumer (moderate size/performance)
224
- if "scores" in model and 45 <= model["scores"].get("average", 0) < 55:
225
- consumer_count += 1
226
-
227
- # Mid-range
228
- if "scores" in model and 55 <= model["scores"].get("average", 0) < 65:
229
- midrange_count += 1
230
-
231
- # GPU-rich (high-end models)
232
- if "scores" in model and model["scores"].get("average", 0) >= 65:
233
- gpu_rich_count += 1
234
-
235
- # Official providers
236
- # This is just a placeholder logic - adapt to your actual criteria
237
- if "/" not in model["model_name"] or model["model_name"].startswith("meta/") or model["model_name"].startswith("google/"):
238
- official_count += 1
239
-
240
- return {
241
- "edge_devices": edge_count,
242
- "consumers": consumer_count,
243
- "midrange": midrange_count,
244
- "gpu_rich": gpu_rich_count,
245
- "official_providers": official_count
246
- }
247
-
248
- if __name__ == "__main__":
249
- import uvicorn
250
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hf.svg DELETED
requirements.txt DELETED
@@ -1,30 +0,0 @@
1
- aiofiles>=22.0,<24.0
2
- anyio>=3.0,<5.0
3
- audioop-lts<1.0; python_version >= "3.13" #it provides support for 'audioop' module removed in latest python version used by pydub
4
- fastapi>=0.95.0
5
- ffmpy
6
- groovy~=0.1
7
- gradio>=5.0.0
8
- httpx>=0.24.1
9
- huggingface_hub>=0.28.1
10
- Jinja2<4.0
11
- markupsafe>=2.0,<4.0
12
- numpy>=1.20.0
13
- orjson~=3.0
14
- packaging
15
- pandas>=1.3.0
16
- pillow>=8.0,<12.0
17
- pydantic>=1.10.0
18
- python-multipart>=0.0.18 # required for fastapi forms.
19
- pydub
20
- pyyaml>=5.0,<7.0
21
- ruff>=0.9.3; sys.platform != 'emscripten' # needed here for custom component docs generation
22
- safehttpx>=0.1.6,<0.2.0
23
- semantic_version~=2.0
24
- starlette>=0.40.0,<1.0; sys.platform != 'emscripten'
25
- tomlkit>=0.12.0,<0.14.0
26
- typer>=0.12,<1.0; sys.platform != 'emscripten'
27
- typing_extensions~=4.0
28
- urllib3~=2.0; sys.platform == 'emscripten' # urllib3 is used for Lite support. Version spec can be omitted because urllib3==2.1.0 is prebuilt for Pyodide and urllib>=2.2.0 supports Pyodide as well.
29
- uvicorn>=0.21.0
30
- requests>=2.28.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run.py DELETED
@@ -1,280 +0,0 @@
1
- import pandas as pd
2
- import requests
3
- from urllib.parse import quote
4
- import subprocess
5
- import os
6
- import time
7
- import sys
8
- from datetime import datetime
9
-
10
- import gradio as gr
11
-
12
- def style_dataframe(df):
13
- if len(df) == 0:
14
- return df
15
-
16
- # Define the columns to highlight based on the screenshot
17
- highlight_cols = ["Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO"]
18
-
19
- # Initialize the styler
20
- styled = df.style
21
-
22
- # Function to create gradient background based on value
23
- def highlight_green(val):
24
- try:
25
- # Extract numeric value from string (remove % if present)
26
- val_float = float(str(val).replace('%', '').replace(' kg', ''))
27
-
28
- # Create gradient background filling based on the value percentage
29
- # Use the exact colors from the example
30
- return f'background: linear-gradient(90deg, rgba(46, 125, 50, 0.5) {val_float}%, rgba(46, 125, 50, 0.1) {val_float}%); color: white;'
31
- except:
32
- return 'background-color: #121212; color: white;'
33
-
34
- # Apply the highlighting to performance metric columns
35
- for col in highlight_cols:
36
- styled = styled.applymap(highlight_green, subset=[col])
37
-
38
- styled = styled.set_properties(
39
- subset=["Model"],
40
- **{'color': '#4da6ff'}
41
- )
42
-
43
- return styled
44
-
45
- def increment_counter(counter):
46
- return counter + 1
47
-
48
- def handle_select(evt: gr.SelectData, counter):
49
- return counter + 1, f"{evt.index}", f"{evt.value}"
50
-
51
- def start_api_server():
52
- api_process = subprocess.Popen(
53
- [sys.executable, "api.py"],
54
- cwd=os.path.dirname(os.path.abspath(__file__))
55
- )
56
- # Give the API server a moment to start
57
- time.sleep(2)
58
- return api_process
59
-
60
- def apply_filters(filters, models_data):
61
- if not filters or len(filters) == 0:
62
- # No filters selected, return all data
63
- return models_data
64
-
65
- filtered_models = []
66
-
67
- for model in models_data:
68
- scores = model.get("scores", {})
69
- average_score = scores.get("average", 0)
70
- model_name = model.get("model_name", "")
71
-
72
- # Check which filters are selected and apply them
73
- should_include = False
74
- for filter_option in filters:
75
- if "Edge Devices" in filter_option and average_score < 45 or "Consumers" in filter_option and 45 <= average_score < 55 or "Mid-range" in filter_option and 55 <= average_score < 65 or "GPU-rich" in filter_option and average_score >= 65 or "Official Providers" in filter_option and ("/" not in model_name or
76
- model_name.startswith("meta/") or
77
- model_name.startswith("google/") or
78
- model_name.startswith("openai/") or
79
- model_name.startswith("microsoft/")):
80
- should_include = True
81
- break
82
-
83
- if should_include:
84
- filtered_models.append(model)
85
-
86
- return filtered_models
87
-
88
- def format_timestamp(timestamp_str):
89
- try:
90
- # Parse ISO format timestamp
91
-
92
- # Try to parse ISO format with timezone
93
- try:
94
- dt = datetime.fromisoformat(timestamp_str)
95
- except:
96
- # Fallback for different timestamp formats
97
- dt = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S.%f")
98
-
99
- # Format nicely for display
100
- return dt.strftime("%B %d, %Y at %I:%M %p")
101
- except Exception as e:
102
- print(f"Error formatting timestamp: {e}")
103
- return timestamp_str
104
-
105
- def create_leaderboard_data(selected_filters=None):
106
- try:
107
- response = requests.get("http://localhost:8000/api/leaderboard")
108
- if response.status_code == 200:
109
- data = response.json()
110
- models_data = data.get("models", [])
111
- updated_at = data.get("updated_at", "Unknown")
112
- formatted_time = format_timestamp(updated_at)
113
-
114
- # Apply filters if any are selected
115
- if selected_filters:
116
- models_data = apply_filters(selected_filters, models_data)
117
-
118
- rows = []
119
- for i, model in enumerate(models_data, 1):
120
- model_name = model["model_name"]
121
- model_type = model["type"]
122
- scores = model["scores"]
123
- co2_cost = model.get("co2_cost", "N/A")
124
-
125
- # Only use green for open and red for closed
126
- emoji = "🟢" if model_type.lower() == "open" else "🔴"
127
- type_with_emoji = f"{emoji} {model_type.upper()}"
128
-
129
- # Use model_link from API if available, otherwise create one
130
- if "model_link" in model and model["model_link"]:
131
- model_link = f"[{model_name}]({model['model_link']})"
132
- # Format model name with link
133
- elif "/" in model_name:
134
- org, name = model_name.split("/", 1)
135
- model_link = f"[{model_name}](https://huggingface.co/{quote(model_name)})"
136
- else:
137
- model_link = f"[{model_name}](https://huggingface.co/models?search={quote(model_name)})"
138
-
139
- rows.append([
140
- i, # Rank
141
- type_with_emoji,
142
- model_link,
143
- f"{scores.get('average', 0):.2f}",
144
- f"{scores.get('ifeval', 0):.2f}",
145
- f"{scores.get('bbhi', 0):.2f}",
146
- f"{scores.get('math', 0):.2f}",
147
- f"{scores.get('gpqa', 0):.2f}",
148
- f"{scores.get('mujb', 0):.2f}",
149
- f"{scores.get('mmlu', 0):.2f}",
150
- f"{co2_cost}" if isinstance(co2_cost, (int, float)) else co2_cost
151
- ])
152
-
153
- df = pd.DataFrame(rows, columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"])
154
- styled_df = style_dataframe(df)
155
- return styled_df, formatted_time
156
- else:
157
- # Return an empty dataframe with proper columns if API fails
158
- empty_df = pd.DataFrame(columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"])
159
- return empty_df, "Unknown"
160
- except Exception as e:
161
- print(f"Error fetching leaderboard data: {e}")
162
- # Return an empty dataframe with proper columns if API fails
163
- empty_df = pd.DataFrame(columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"])
164
- return empty_df, "Unknown"
165
-
166
- def load_svg(file_path="hf.svg"):
167
- with open(file_path) as f:
168
- svg_content = f.read()
169
- return svg_content
170
-
171
- def get_filter_data():
172
- try:
173
- response = requests.get("http://localhost:8000/api/filters")
174
- if response.status_code == 200:
175
- filter_data = response.json()
176
- return [
177
- f"For Edge Devices · {filter_data.get('edge_devices', 0)}",
178
- f"For Consumers · {filter_data.get('consumers', 0)}",
179
- f"Mid-range · {filter_data.get('midrange', 0)}",
180
- f"For the GPU-rich · {filter_data.get('gpu_rich', 0)}",
181
- f"Only Official Providers · {filter_data.get('official_providers', 0)}"
182
- ]
183
- else:
184
- return [
185
- "For Edge Devices · 0",
186
- "For Consumers · 0",
187
- "Mid-range · 0",
188
- "For the GPU-rich · 0",
189
- "Only Official Providers · 0"
190
- ]
191
- except Exception as e:
192
- print(f"Error fetching filter data: {e}")
193
- return [
194
- "For Edge Devices · 0",
195
- "For Consumers · 0",
196
- "Mid-range · 0",
197
- "For the GPU-rich · 0",
198
- "Only Official Providers · 0"
199
- ]
200
-
201
- def refresh_leaderboard(selected_filters=None):
202
- try:
203
- # Request a refresh from the API
204
- requests.get("http://localhost:8000/api/leaderboard?refresh=true")
205
- # Get updated data
206
- df, timestamp = create_leaderboard_data(selected_filters)
207
- filter_choices = get_filter_data()
208
- return df, filter_choices, f"Last updated: {timestamp}"
209
- except Exception as e:
210
- print(f"Error refreshing data: {e}")
211
- return None, None, "Error refreshing data"
212
-
213
- def update_table(filters):
214
- df, timestamp = create_leaderboard_data(filters)
215
- return df, f"Last updated: {timestamp}"
216
-
217
- def load_css(file_path="style.css"):
218
- try:
219
- current_dir = os.path.dirname(os.path.abspath(__file__))
220
- css_path = os.path.join(current_dir, file_path)
221
- with open(css_path) as f:
222
- css_content = f.read()
223
- return css_content
224
- except Exception as e:
225
- print(f"Error loading CSS file: {e}")
226
- # Return a basic CSS if file not found
227
- return """
228
- .dataframe-container {
229
- border-radius: 8px;
230
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
231
- }
232
- """
233
-
234
- with gr.Blocks(css=load_css()) as demo:
235
- df, timestamp = create_leaderboard_data()
236
-
237
- with gr.Row():
238
- svg_content = load_svg()
239
- gr.HTML(svg_content)
240
- gr.HTML("""
241
- <div style="display: flex; align-items: center; justify-content: center; margin-bottom: 10px;">
242
- <div class="leaderboard-title">Open LLM Leaderboard</div>
243
- </div>
244
- <div class="leaderboard-subtitle">Comparing Large Language Models in an open and reproducible way</div>
245
- """)
246
- status_text = gr.HTML(f"""<div style="text-align: center; margin-bottom: 10px;">Last updated: {timestamp}</div>""")
247
-
248
- with gr.Row(elem_classes="filters-container"):
249
- filter_choices = get_filter_data()
250
- filters = gr.CheckboxGroup(
251
- label="Quick Filters",
252
- choices=filter_choices,
253
- )
254
-
255
- # Create and display the dataframe
256
-
257
- leaderboard_table = gr.Dataframe(
258
- value=df,
259
- headers=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"],
260
- datatype=["number", "str", "markdown", "str", "str", "str", "str", "str", "str", "str", "str"],
261
- elem_id="leaderboard-table",
262
- elem_classes="dataframe-container",
263
- interactive=False,
264
- max_height=600,
265
- show_search="search",
266
- show_copy_button=True,
267
- show_fullscreen_button=True,
268
- pinned_columns=2,
269
- column_widths=["5%", "10%", "35%", "7%", "7%", "7%", "7%", "7%", "7%", "7%", "6%"]
270
- )
271
-
272
- refresh_btn = gr.Button("Refresh Data", elem_classes="refresh-btn")
273
- refresh_btn.click(refresh_leaderboard, inputs=[filters], outputs=[leaderboard_table, filters, status_text])
274
-
275
- filters.change(update_table, inputs=[filters], outputs=[leaderboard_table, status_text])
276
-
277
- if __name__ == "__main__":
278
- api_process = start_api_server()
279
- demo.launch()
280
- api_process.terminate()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run_all.py DELETED
@@ -1,68 +0,0 @@
1
- #!/usr/bin/env python3
2
- import subprocess
3
- import sys
4
- import os
5
- import time
6
- import signal
7
- import atexit
8
-
9
- def start_api_server():
10
- api_process = subprocess.Popen(
11
- [sys.executable, "api.py"],
12
- cwd=os.path.dirname(os.path.abspath(__file__))
13
- )
14
- print("API server started (PID:", api_process.pid, ")")
15
- return api_process
16
-
17
- def start_gradio_app():
18
- gradio_process = subprocess.Popen(
19
- [sys.executable, "-c", "import gradio as gr; import run; run.demo.launch()"],
20
- cwd=os.path.dirname(os.path.abspath(__file__))
21
- )
22
- print("Gradio interface started (PID:", gradio_process.pid, ")")
23
- return gradio_process
24
-
25
- def cleanup_processes(api_process, gradio_process):
26
- print("\nShutting down services...")
27
-
28
- if api_process and api_process.poll() is None:
29
- api_process.terminate()
30
- print("API server terminated")
31
-
32
- if gradio_process and gradio_process.poll() is None:
33
- gradio_process.terminate()
34
- print("Gradio interface terminated")
35
-
36
- def main():
37
- api_process = start_api_server()
38
- # Give the API server a moment to start
39
- time.sleep(2)
40
-
41
- gradio_process = start_gradio_app()
42
-
43
- # Register cleanup function to be called on exit
44
- atexit.register(cleanup_processes, api_process, gradio_process)
45
-
46
- # Handle keyboard interrupts
47
- def signal_handler(sig, frame):
48
- print("\nReceived termination signal")
49
- cleanup_processes(api_process, gradio_process)
50
- sys.exit(0)
51
-
52
- signal.signal(signal.SIGINT, signal_handler)
53
- signal.signal(signal.SIGTERM, signal_handler)
54
-
55
- print("\nLeaderboard application started!")
56
- print("- API server running at http://localhost:8000")
57
- print("- Gradio interface running at http://localhost:7860")
58
- print("\nPress Ctrl+C to stop all services")
59
-
60
- # Keep the main process running
61
- try:
62
- while True:
63
- time.sleep(1)
64
- except KeyboardInterrupt:
65
- pass
66
-
67
- if __name__ == "__main__":
68
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
style.css DELETED
@@ -1,44 +0,0 @@
1
- .html-container {
2
- text-align: center;
3
- display: flex;
4
- justify-content: center;
5
- width: 100%;
6
- }
7
-
8
- .dataframe-container {
9
- margin-top: 0.5rem;
10
- margin-bottom: 0.5rem;
11
- }
12
-
13
- .leaderboard-title {
14
- font-size: 1.5rem;
15
- font-weight: bold;
16
- margin-bottom: 0.25rem;
17
- color: #f0f0f0;
18
- }
19
-
20
- .leaderboard-subtitle {
21
- font-size: 0.9rem;
22
- margin-bottom: 1rem;
23
- color: #a0a0a0;
24
- }
25
-
26
- .filters-container fieldset {
27
- display: flex;
28
- flex-direction: row;
29
- justify-content: center;
30
- align-items: center;
31
- gap: 0.5rem;
32
- }
33
-
34
- .refresh-btn {
35
- margin-top: 0.5rem;
36
- }
37
-
38
- .status-container {
39
- display: flex;
40
- justify-content: flex-end;
41
- font-size: 0.75rem;
42
- color: #a0a0a0;
43
- }
44
-