MLRC_Bench

Running

App Files Files Community

MLRC_Bench / src /utils /config.py

Armeddinosaur

Adding MLRC Bench

ed2eb44 4 months ago

raw

history blame

3.27 kB

	# Theme and configuration settings for the Model Capability Leaderboard application

	# Theme colors - using dark mode by default
	dark_theme = {
	'bg_color': '#1a202c',
	'text_color': '#e2e8f0',
	'card_bg': '#2d3748',
	'primary': '#818cf8',
	'secondary': '#a78bfa',
	'border': '#4a5568',
	'hover': '#4a5568',
	'table_header': '#2d3748',
	'table_border': '#4a5568',
	'heading_color': '#e2e8f0',
	'gradient': 'linear-gradient(135deg, #818cf8 0%, #a78bfa 100%)',
	'warning_bg': '#7c2d12',
	'warning_border': '#f97316',
	'info_bg': '#1e3a8a',
	'info_border': '#3b82f6',
	'footer_color': '#a0aec0',
	'title_color': 'white',
	'subtitle_color': 'rgba(255, 255, 255, 0.9)',
	'footer_border': '#4a5568',
	'task_title': '#a5b4fc',
	'task_border': '#818cf8',
	# Table-specific colors for the custom table
	'table_bg': '#0a0a0a',
	'table_border_color': '#333',
	'table_header_bg': '#191919',
	'table_subheader_bg': '#141414',
	'table_average_column_bg': '#202020',
	'table_row_odd': '#0a0a0a',
	'table_row_even': '#111111',
	'table_hover_bg': '#1a1a1a',
	'positive_value_color': '#4ade80',
	'negative_value_color': '#f87171'
	}

	# Application settings
	app_config = {
	'title': 'MLRC-Bench Leaderboard',
	'description': 'Machine Learning Research Challenges Benchmark for AI Agents',
	'layout': 'wide',
	'initial_sidebar_state': 'collapsed'
	}

	# Metrics configuration
	metrics_config = {
	"Margin to Human": {
	"file": "src/data/metrics/margin_to_human.json",
	"description": "Performance on Machine Learning Research Challenges. Higher values indicate better research capabilities.",
	"min_value": -100, # Approximate, adjust as needed
	"max_value": 50, # Approximate, adjust as needed
	"color_map": "RdYlGn"
	}
	# Future metrics can be added here
	# "Another Metric": {
	# "file": "src/data/metrics/another_metric.json",
	# "description": "Description of another metric",
	# "min_value": 0,
	# "max_value": 100,
	# "color_map": "viridis"
	# }
	}

	# Model type categories
	model_categories = {
	"MLAB (claude-3-5-sonnet-v2)": "Closed Source",
	"MLAB (gemini-exp-1206)": "Closed Source",
	"MLAB (o3-mini)": "Closed Source",
	"MLAB (gpt-4o)": "Closed Source",
	"MLAB (llama3-1-405b-instruct)": "Open Weights",
	"CoI-Agent (o1) + MLAB (gpt-4o)": "Closed Source"
	# More models would be added here as needed
	}

	# Task descriptions
	tasks_info = {
	"Perception Temporal Action Loc": "Testing the model's ability to understand and localize actions within temporal sequences of events.",
	"Llm Merging": "Assessing the capability to effectively merge knowledge from multiple language models.",
	"Meta Learning": "Evaluating the model's ability to learn how to learn - adapting quickly to new tasks.",
	"Product Recommendation": "Testing the model's ability to recommend relevant products based on user preferences and behavior.",
	"Machine Unlearning": "Evaluating how well models can 'unlearn' specific information when required.",
	"Backdoor Trigger Recovery": "Testing resilience against backdoor attacks and ability to recover from triggered behaviors."
	}