import { Box, Typography } from "@mui/material"; const createTooltipContent = (title, items) => ( {title} {items.map(({ label, description, subItems }, index) => (
  • {label}: {description} {subItems && ( {subItems.map((item, subIndex) => (
  • {item}
  • ))}
    )} ))}
    ); export const COLUMN_TOOLTIPS = { AVERAGE: createTooltipContent("Average score across all benchmarks:", [ { label: "Calculation", description: "Weighted average of normalized scores from all benchmarks", subItems: [ "Each benchmark is normalized to a 0-100 scale", "All normalised benchmarks are then averaged together", ], }, ]), IFEVAL: createTooltipContent("Instruction-Following Evaluation (IFEval):", [ { label: "Purpose", description: "Tests model's ability to follow explicit formatting instructions", subItems: ["Instruction following", "Formatting", "Generation"], }, { label: "Scoring", description: "Accuracy: was the format asked for strictly respected.", }, ]), BBH: createTooltipContent("Big Bench Hard (BBH):", [ { label: "Overview", description: "Collection of challenging for LLM tasks across domains", subItems: [ "Language understanding", "Mathematical reasoning", "Common sense and world knowledge", ], }, { label: "Scoring", description: "Accuracy: was the correct choice selected among the options.", }, ]), MATH: createTooltipContent( "Mathematics Aptitude Test of Heuristics (MATH), level 5:", [ { label: "Content", description: "High school level competitions mathematical problems", subItems: ["Complex algebra", "Geometry problems", "Advanced calculus"], }, { label: "Evaluation", description: "Accuracy: is the solution generated correct and in the expected format", }, ] ), GPQA: createTooltipContent("Graduate-Level Google-Proof Q&A (GPQA):", [ { label: "Focus", description: "PhD-level knowledge multiple choice questions in science", subItems: [ "PhD-level chemistry", "PhD-level biology", "PhD-level physics", ], }, { label: "Methodology", description: "Accuracy: was the correct choice selected among the options.", }, ]), MUSR: createTooltipContent("Multistep Soft Reasoning (MUSR):", [ { label: "Scope", description: "Reasoning and understanding on/of long texts", subItems: [ "Language understanding", "Reasoning capabilities", "Long context reasoning", ], }, { label: "Scoring", description: "Accuracy: was the correct choice selected among the options.", }, ]), MMLU_PRO: createTooltipContent( "Massive Multitask Language Understanding - Professional (MMLU-Pro):", [ { label: "Coverage", description: "Expertly reviewed multichoice questions across domains", subItems: [ "Medicine and healthcare", "Law and ethics", "Engineering", "Mathematics", ], }, { label: "Evaluation", description: "Accuracy: was the correct choice selected among the options.", }, ] ), ARCHITECTURE: createTooltipContent("Model Architecture Information:", [ { label: "Definition", description: "The fundamental structure and design of the model", subItems: [ "Base architecture type (e.g., Llama, Mistral, GPT-J)", "Specific architectural innovations and improvements", "Model family and version information", "Core design principles and techniques used", ], }, { label: "Impact", description: "How architecture affects model capabilities", subItems: [ "Influences model's learning capacity and efficiency", "Determines hardware compatibility and requirements", "Affects inference speed and memory usage", ], }, ]), PRECISION: createTooltipContent("Numerical Precision Format:", [ { label: "Overview", description: "Data format used to store model weights and perform computations", subItems: [ "BFloat16: Brain Float format, good for training stability", "Float16: Half precision, balances accuracy and speed", "Int8/Int4: Quantized formats for efficiency", "GPTQ/AWQ: Advanced quantization techniques", ], }, { label: "Impact", description: "How precision affects model deployment", subItems: [ "Higher precision = better accuracy but more memory usage", "Lower precision = faster inference and smaller size", "Different hardware compatibility requirements", "Trade-off between model quality and resource usage", ], }, { label: "Use Cases", description: "Choosing the right precision format", subItems: [ "Production deployment optimization", "Resource-constrained environments", "High-performance computing scenarios", ], }, ]), FLAGS: createTooltipContent("Model Flags and Special Features:", [ { label: "Purpose", description: "Special indicators and capabilities of the model", subItems: [ "Safeguards and content filtering features", "Specialized training techniques used", "Hardware optimization flags", "Deployment-specific configurations", ], }, { label: "Common Flags", description: "Frequently used model indicators", subItems: [ "RLHF: Reinforcement Learning from Human Feedback", "DPO: Direct Preference Optimization", "MoE: Mixture of Experts architecture", "Flash Attention: Optimized attention implementation", ], }, ]), PARAMETERS: createTooltipContent("Model Parameters:", [ { label: "Measurement", description: "Total number of trainable parameters in billions", subItems: [ "Indicates model capacity and complexity", "Correlates with computational requirements", "Influences memory usage and inference speed", ], }, ]), LICENSE: createTooltipContent("Model License Information:", [ { label: "Importance", description: "Legal terms governing model usage and distribution", subItems: [ "Commercial vs non-commercial use", "Attribution requirements", "Modification and redistribution rights", "Liability and warranty terms", ], }, ]), CO2_COST: createTooltipContent("Carbon Dioxide Emissions:", [ { label: "What is it?", description: "CO₂ emissions of the model evaluation ", subItems: [ "Only focuses on model inference for our specific setup", "Considers data center location and energy mix", "Allows equivalent comparision of models on our use case", ], }, { label: "Why it matters", description: "Environmental impact of AI model training", subItems: [ "Large models can have significant carbon footprints", "Helps make informed choices about model selection", "Promotes awareness of AI's environmental impact", ], }, { label: "Learn more", description: "For detailed information about our CO₂ calculation methodology, visit:", subItems: [ Carbon Emissions Documentation ↗ , ], }, ]), }; export const UI_TOOLTIPS = { COLUMN_SELECTOR: "Choose which columns to display in the table", DISPLAY_OPTIONS: createTooltipContent("Table Display Options", [ { label: "Overview", description: "Configure how the table displays data and information", subItems: [ "Row size and layout", "Score display format", "Ranking calculation", "Average score computation", ], }, ]), SEARCH_BAR: createTooltipContent("Advanced Model Search", [ { label: "Name Search", description: "Search directly by model name", subItems: [ "Supports regular expressions (e.g., ^mistral.*7b)", "Case sensitive", ], }, { label: "Field Search", description: "Use @field:value syntax for precise filtering", subItems: [ "@architecture:llama - Filter by architecture", "@license:mit - Filter by license", "@precision:float16 - Filter by precision", "@type:chat - Filter by model type", ], }, { label: "Multiple Searches", description: "Combine multiple criteria using semicolons", subItems: [ "meta @license:mit; @architecture:llama", "^mistral.*7b; @precision:float16", ], }, ]), QUICK_FILTERS: createTooltipContent( "Filter models based on their size and capabilities:", [ { label: "Small Models (1.7B-7B)", description: "Efficient models for consumer hardware and edge devices, optimized for fast inference.", }, { label: "Medium Models (7B-70B)", description: "Balanced performance and resource usage, ideal for most production use cases.", }, { label: "Large Models (70B+)", description: "State-of-the-art performance for complex tasks, requires significant computing power.", }, { label: "Official Providers", description: "Models directly maintained by their original creators, ensuring reliability and up-to-date performance.", }, ] ), ROW_SIZE: { title: "Row Size", description: "Adjust the height of table rows. Compact is ideal for viewing more data at once, while Large provides better readability and touch targets.", }, SCORE_DISPLAY: { title: "Score Display", description: "Choose between normalized scores (0-100% scale for easy comparison) or raw scores (actual benchmark results). Normalized scores help compare performance across different benchmarks, while raw scores show actual benchmark outputs.", }, RANKING_MODE: { title: "Ranking Mode", description: "Choose between static ranking (original position in the full leaderboard) or dynamic ranking (position based on current filters and sorting).", }, AVERAGE_SCORE: { title: "Average Score Calculation", description: "Define how the average score is calculated. 'All Scores' uses all benchmarks, while 'Visible Only' calculates the average using only the visible benchmark columns.", }, }; export const getTooltipStyle = {}; export const TABLE_TOOLTIPS = { HUB_LINK: (modelName) => `View ${modelName} on Hugging Face Hub`, EVAL_RESULTS: (modelName) => `View detailed evaluation results for ${modelName}`, POSITION_CHANGE: (change) => `${Math.abs(change)} position${Math.abs(change) > 1 ? "s" : ""} ${ change > 0 ? "up" : "down" }`, METADATA: { TYPE: (type) => type || "-", ARCHITECTURE: (arch) => arch || "-", PRECISION: (precision) => precision || "-", LICENSE: (license) => license || "-", UPLOAD_DATE: (date) => date || "-", SUBMISSION_DATE: (date) => date || "-", BASE_MODEL: (model) => model || "-", }, };