import pandas as pd import numpy as np import gradio as gr from urllib.parse import quote def style_dataframe(df): if len(df) == 0: return df highlight_cols = ["Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO"] styled = df.style def highlight_green(val): try: val_float = float(str(val).replace('%', '').replace(' kg', '')) return f'background: linear-gradient(90deg, rgba(46, 125, 50, 0.5) {val_float}%, rgba(46, 125, 50, 0.1) {val_float}%); color: white;' except: return 'background-color: #121212; color: white;' for col in highlight_cols: styled = styled.applymap(highlight_green, subset=[col]) styled = styled.set_properties( subset=["Model"], **{'color': '#4da6ff'} ) return styled def create_leaderboard_data(): models = [ {"model_name": "meta-llama/llama-3-70b-instruct", "type": "open"}, {"model_name": "mistralai/Mistral-7B-Instruct-v0.3", "type": "open"}, {"model_name": "google/gemma-7b-it", "type": "open"}, {"model_name": "Qwen/Qwen2-7B-Instruct", "type": "open"}, {"model_name": "anthropic/claude-3-opus", "type": "closed"}, {"model_name": "OpenAI/gpt-4o", "type": "closed"}, {"model_name": "01-ai/Yi-1.5-34B-Chat", "type": "open"}, {"model_name": "google/gemma-2b", "type": "open"}, {"model_name": "microsoft/phi-3-mini-4k-instruct", "type": "open"}, {"model_name": "microsoft/phi-3-mini-128k-instruct", "type": "open"}, {"model_name": "stabilityai/stable-beluga-7b", "type": "open"}, {"model_name": "togethercomputer/RedPajama-INCITE-7B-Instruct", "type": "open"}, {"model_name": "databricks/dbrx-instruct", "type": "closed"}, {"model_name": "mosaicml/mpt-7b-instruct", "type": "open"}, {"model_name": "01-ai/Yi-1.5-9B-Chat", "type": "open"} ] np.random.seed(42) rows = [] for i, model in enumerate(models, 1): model_name = model["model_name"] model_type = model["type"] emoji = "🟢" if model_type.lower() == "open" else "🔴" type_with_emoji = f"{emoji} {model_type.upper()}" if "/" in model_name: org, name = model_name.split("/", 1) model_link = f"[{model_name}](https://huggingface.co/{quote(model_name)})" else: model_link = f"[{model_name}](https://huggingface.co/models?search={quote(model_name)})" average = round(np.random.uniform(40, 90), 2) ifeval = round(np.random.uniform(30, 90), 2) bbhi = round(np.random.uniform(40, 85), 2) math = round(np.random.uniform(20, 80), 2) gpqa = round(np.random.uniform(10, 70), 2) mujb = round(np.random.uniform(10, 70), 2) mmlu = round(np.random.uniform(40, 85), 2) co2_cost = round(np.random.uniform(1, 100), 2) rows.append([ i, type_with_emoji, model_link, f"{average}", f"{ifeval}", f"{bbhi}", f"{math}", f"{gpqa}", f"{mujb}", f"{mmlu}", f"{co2_cost} kg" ]) rows.sort(key=lambda x: float(x[3]), reverse=True) for i, row in enumerate(rows, 1): row[0] = i df = pd.DataFrame(rows, columns=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"]) return style_dataframe(df) def get_filter_data(): return { "For Edge Devices": 5, "For Consumers": 4, "Mid-range": 4, "For the GPU-rich": 3, "Only Official Providers": 8 } css = """ .html-container { text-align: center; display: flex; justify-content: center; width: 100%; } .dataframe-container { margin-top: 0.5rem; margin-bottom: 0.5rem; } .leaderboard-title { font-size: 1.5rem; font-weight: bold; margin-bottom: 0.25rem; color: #f0f0f0; } .leaderboard-subtitle { font-size: 0.9rem; margin-bottom: 1rem; color: #a0a0a0; } .filters-container { margin-bottom: 0.5rem; } .hf-logo { height: 1.5rem; margin-right: 0.5rem; } .header-container { display: flex; align-items: center; justify-content: center; margin-bottom: 10px; } .header-container svg { margin-right: 0.5rem; flex-shrink: 0; } """ filter_data = get_filter_data() filter_choices = [f"{key} · {value}" for key, value in filter_data.items()] # HF logo SVG content to embed directly hf_logo_svg = """ """ with gr.Blocks(css=css) as demo: gr.HTML(f"""
{hf_logo_svg}

Open LLM Leaderboard
Comparing Large Language Models in an open and reproducible way
""") with gr.Row(): filters = gr.CheckboxGroup( label="Quick Filters", choices=filter_choices, ) with gr.Row(): status_text = gr.HTML("
Last updated: June 25, 2024 at 10:30 AM
") leaderboard_df = create_leaderboard_data() leaderboard_table = gr.Dataframe( value=leaderboard_df, headers=["Rank", "Type", "Model", "Average", "IFEval", "BBHI", "MATH", "GPQA", "MUJB", "MMLU-PRO", "CO_Cost"], datatype=["number", "str", "markdown", "str", "str", "str", "str", "str", "str", "str", "str"], elem_id="leaderboard-table", interactive=False, max_height=600, show_search="search", show_copy_button=True, show_fullscreen_button=True, pinned_columns=2, column_widths=["5%", "7%", "35%", "7%", "7%", "7%", "7%", "7%", "7%", "7%", "6%"] ) refresh_btn = gr.Button("Refresh Data") refresh_btn.click(fn=lambda: create_leaderboard_data(), outputs=leaderboard_table) if __name__ == "__main__": demo.launch()