Spaces:
Running
Running
# Theme and configuration settings for the Model Capability Leaderboard application | |
# Theme colors - using dark mode by default | |
dark_theme = { | |
'bg_color': '#1a202c', | |
'text_color': '#e2e8f0', | |
'card_bg': '#2d3748', | |
'primary': '#818cf8', | |
'secondary': '#a78bfa', | |
'border': '#4a5568', | |
'hover': '#4a5568', | |
'table_header': '#2d3748', | |
'table_border': '#4a5568', | |
'heading_color': '#e2e8f0', | |
'gradient': 'linear-gradient(135deg, #818cf8 0%, #a78bfa 100%)', | |
'warning_bg': '#7c2d12', | |
'warning_border': '#f97316', | |
'info_bg': '#1e3a8a', | |
'info_border': '#3b82f6', | |
'footer_color': '#a0aec0', | |
'title_color': 'white', | |
'subtitle_color': 'rgba(255, 255, 255, 0.9)', | |
'footer_border': '#4a5568', | |
'task_title': '#a5b4fc', | |
'task_border': '#818cf8', | |
# Table-specific colors for the custom table | |
'table_bg': '#0a0a0a', | |
'table_border_color': '#333', | |
'table_header_bg': '#191919', | |
'table_subheader_bg': '#141414', | |
'table_average_column_bg': '#202020', | |
'table_row_odd': '#0a0a0a', | |
'table_row_even': '#111111', | |
'table_hover_bg': '#1a1a1a', | |
'positive_value_color': '#4ade80', | |
'negative_value_color': '#f87171' | |
} | |
# Application settings | |
app_config = { | |
'title': 'MLRC-Bench Leaderboard', | |
'description': 'Machine Learning Research Challenges Benchmark for AI Agents', | |
'layout': 'wide', | |
'initial_sidebar_state': 'collapsed' | |
} | |
# Metrics configuration | |
metrics_config = { | |
"Margin to Human": { | |
"file": "src/data/metrics/margin_to_human.json", | |
"description": "Performance on Machine Learning Research Challenges. Higher values indicate better research capabilities.", | |
"min_value": -100, # Approximate, adjust as needed | |
"max_value": 50, # Approximate, adjust as needed | |
"color_map": "RdYlGn" | |
} | |
# Future metrics can be added here | |
# "Another Metric": { | |
# "file": "src/data/metrics/another_metric.json", | |
# "description": "Description of another metric", | |
# "min_value": 0, | |
# "max_value": 100, | |
# "color_map": "viridis" | |
# } | |
} | |
# Model type categories | |
model_categories = { | |
"MLAB (claude-3-5-sonnet-v2)": "Closed Source", | |
"MLAB (gemini-exp-1206)": "Closed Source", | |
"MLAB (o3-mini)": "Closed Source", | |
"MLAB (gpt-4o)": "Closed Source", | |
"MLAB (llama3-1-405b-instruct)": "Open Weights", | |
"CoI-Agent (o1) + MLAB (gpt-4o)": "Closed Source" | |
# More models would be added here as needed | |
} | |
# Task descriptions | |
tasks_info = { | |
"Perception Temporal Action Loc": "Testing the model's ability to understand and localize actions within temporal sequences of events.", | |
"Llm Merging": "Assessing the capability to effectively merge knowledge from multiple language models.", | |
"Meta Learning": "Evaluating the model's ability to learn how to learn - adapting quickly to new tasks.", | |
"Product Recommendation": "Testing the model's ability to recommend relevant products based on user preferences and behavior.", | |
"Machine Unlearning": "Evaluating how well models can 'unlearn' specific information when required.", | |
"Backdoor Trigger Recovery": "Testing resilience against backdoor attacks and ability to recover from triggered behaviors." | |
} |