import gradio as gr
import pandas as pd
import os
import re
from datetime import datetime
from huggingface_hub import hf_hub_download
from huggingface_hub import HfApi, HfFolder
from constants import CITATION_TEXT
LEADERBOARD_FILE = "leaderboard.csv"
GROUND_TRUTH_FILE = "ground_truth.csv"
LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
# Ensure authentication and suppress warnings
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
raise ValueError("HF_TOKEN environment variable is not set or invalid.")
# def initialize_leaderboard_file():
# """
# Ensure the leaderboard file exists and has the correct headers.
# """
# if not os.path.exists(LEADERBOARD_FILE):
# pd.DataFrame(columns=[
# "Model Name", "Overall Accuracy", "Valid Accuracy",
# "Correct Predictions", "Total Questions", "Timestamp"
# ]).to_csv(LEADERBOARD_FILE, index=False)
# elif os.stat(LEADERBOARD_FILE).st_size == 0:
# pd.DataFrame(columns=[
# "Model Name", "Overall Accuracy", "Valid Accuracy",
# "Correct Predictions", "Total Questions", "Timestamp"
# ]).to_csv(LEADERBOARD_FILE, index=False)
# def clean_answer(answer):
# if pd.isna(answer):
# return None
# answer = str(answer)
# clean = re.sub(r'[^A-Da-d]', '', answer)
# return clean[0].upper() if clean else None
# def update_leaderboard(results):
# """
# Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
# """
# new_entry = {
# "Model Name": results['model_name'],
# "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
# "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
# "Correct Predictions": results['correct_predictions'],
# "Total Questions": results['total_questions'],
# "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
# }
# try:
# # Update the local leaderboard file
# new_entry_df = pd.DataFrame([new_entry])
# file_exists = os.path.exists(LEADERBOARD_FILE)
# new_entry_df.to_csv(
# LEADERBOARD_FILE,
# mode='a', # Append mode
# index=False,
# header=not file_exists # Write header only if the file is new
# )
# print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
# # Push the updated file to the Hugging Face repository using HTTP API
# api = HfApi()
# token = HfFolder.get_token()
# api.upload_file(
# path_or_fileobj=LEADERBOARD_FILE,
# path_in_repo="leaderboard.csv",
# repo_id="SondosMB/ss", # Your Space repository
# repo_type="space",
# token=token
# )
# print("Leaderboard changes pushed to Hugging Face repository.")
# except Exception as e:
# print(f"Error updating leaderboard file: {e}")
# def load_leaderboard():
# if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
# return pd.DataFrame({
# "Model Name": [],
# "Overall Accuracy": [],
# "Valid Accuracy": [],
# "Correct Predictions": [],
# "Total Questions": [],
# "Timestamp": [],
# })
# return pd.read_csv(LEADERBOARD_FILE)
# def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
# try:
# ground_truth_path = hf_hub_download(
# repo_id="SondosMB/ground-truth-dataset",
# filename="ground_truth.csv",
# repo_type="dataset",
# use_auth_token=True
# )
# ground_truth_df = pd.read_csv(ground_truth_path)
# except FileNotFoundError:
# return "Ground truth file not found in the dataset repository.", load_leaderboard()
# except Exception as e:
# return f"Error loading ground truth: {e}", load_leaderboard()
# if not prediction_file:
# return "Prediction file not uploaded.", load_leaderboard()
# try:
# #load predition file
# predictions_df = pd.read_csv(prediction_file.name)
# # Validate required columns in prediction file
# required_columns = ['question_id', 'predicted_answer']
# missing_columns = [col for col in required_columns if col not in predictions_df.columns]
# if missing_columns:
# return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
# load_leaderboard())
# # Validate 'Answer' column in ground truth file
# if 'Answer' not in ground_truth_df.columns:
# return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
# merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
# merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
# valid_predictions = merged_df.dropna(subset=['pred_answer'])
# correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
# total_predictions = len(merged_df)
# total_valid_predictions = len(valid_predictions)
# overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
# valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
# results = {
# 'model_name': model_name if model_name else "Unknown Model",
# 'overall_accuracy': overall_accuracy,
# }
# if add_to_leaderboard:
# update_leaderboard(results)
# return "Evaluation completed and added to leaderboard.", load_leaderboard()
# else:
# return "Evaluation completed but not added to leaderboard.", load_leaderboard()
# except Exception as e:
# return f"Error during evaluation: {str(e)}", load_leaderboard()
# initialize_leaderboard_file()
# def initialize_leaderboard_file():
# """
# Ensure the leaderboard file exists and has the correct headers.
# """
# if not os.path.exists(LEADERBOARD_FILE):
# pd.DataFrame(columns=[
# "Model Name", "Overall Accuracy", "Valid Accuracy",
# "Correct Predictions", "Total Questions", "Timestamp"
# ]).to_csv(LEADERBOARD_FILE, index=False)
# elif os.stat(LEADERBOARD_FILE).st_size == 0:
# pd.DataFrame(columns=[
# "Model Name", "Overall Accuracy", "Valid Accuracy",
# "Correct Predictions", "Total Questions", "Timestamp"
# ]).to_csv(LEADERBOARD_FILE, index=False)
def initialize_leaderboard_file():
"""
Ensure the leaderboard file exists and has the correct headers.
"""
if not os.path.exists(LEADERBOARD_FILE):
pd.DataFrame(columns=[
"Model Name", "Overall Accuracy", "Correct Predictions",
"Total Questions", "Timestamp", "Team Name"
]).to_csv(LEADERBOARD_FILE, index=False)
elif os.stat(LEADERBOARD_FILE).st_size == 0:
pd.DataFrame(columns=[
"Model Name", "Overall Accuracy", "Correct Predictions",
"Total Questions", "Timestamp", "Team Name"
]).to_csv(LEADERBOARD_FILE, index=False)
def clean_answer(answer):
if pd.isna(answer):
return None
answer = str(answer)
clean = re.sub(r'[^A-Da-d]', '', answer)
return clean[0].upper() if clean else None
# def update_leaderboard(results):
# """
# Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
# """
# new_entry = {
# "Model Name": results['model_name'],
# "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
# "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
# "Correct Predictions": results['correct_predictions'],
# "Total Questions": results['total_questions'],
# "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
# }
# try:
# # Update the local leaderboard file
# new_entry_df = pd.DataFrame([new_entry])
# file_exists = os.path.exists(LEADERBOARD_FILE)
# new_entry_df.to_csv(
# LEADERBOARD_FILE,
# mode='a', # Append mode
# index=False,
# header=not file_exists # Write header only if the file is new
# )
# print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
# # Push the updated file to the Hugging Face repository using HTTP API
# api = HfApi()
# token = HfFolder.get_token()
# api.upload_file(
# path_or_fileobj=LEADERBOARD_FILE,
# path_in_repo="leaderboard.csv",
# repo_id="SondosMB/ss", # Your Space repository
# repo_type="space",
# token=token
# )
# print("Leaderboard changes pushed to Hugging Face repository.")
# except Exception as e:
# print(f"Error updating leaderboard file: {e}")
def update_leaderboard(results):
"""
Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
"""
new_entry = {
"Model Name": results['model_name'],
"Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
"Correct Predictions": results['correct_predictions'],
"Total Questions": results['total_questions'],
"Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"Team Name": results['Team_name']
}
try:
# Update the local leaderboard file
new_entry_df = pd.DataFrame([new_entry])
file_exists = os.path.exists(LEADERBOARD_FILE)
new_entry_df.to_csv(
LEADERBOARD_FILE,
mode='a', # Append mode
index=False,
header=not file_exists # Write header only if the file is new
)
print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
# Push the updated file to the Hugging Face repository using HTTP API
api = HfApi()
token = HfFolder.get_token()
api.upload_file(
path_or_fileobj=LEADERBOARD_FILE,
path_in_repo="leaderboard.csv",
repo_id="SondosMB/Mobile-MMLU", # Your Space repository
repo_type="space",
token=token
)
print("Leaderboard changes pushed to Hugging Face repository.")
except Exception as e:
print(f"Error updating leaderboard file: {e}")
# def load_leaderboard():
# if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
# return pd.DataFrame({
# "Model Name": [],
# "Overall Accuracy": [],
# "Valid Accuracy": [],
# "Correct Predictions": [],
# "Total Questions": [],
# "Timestamp": [],
# })
# return pd.read_csv(LEADERBOARD_FILE)
def load_leaderboard():
if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
return pd.DataFrame({
"Model Name": [],
"Overall Accuracy": [],
"Correct Predictions": [],
"Total Questions": [],
"Timestamp": [],
"Team Name": [],
})
return pd.read_csv(LEADERBOARD_FILE)
# def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
# try:
# ground_truth_path = hf_hub_download(
# repo_id="SondosMB/ground-truth-dataset",
# filename="ground_truth.csv",
# repo_type="dataset",
# use_auth_token=True
# )
# ground_truth_df = pd.read_csv(ground_truth_path)
# except FileNotFoundError:
# return "Ground truth file not found in the dataset repository.", load_leaderboard()
# except Exception as e:
# return f"Error loading ground truth: {e}", load_leaderboard()
# if not prediction_file:
# return "Prediction file not uploaded.", load_leaderboard()
# try:
# #load predition file
# predictions_df = pd.read_csv(prediction_file.name)
# # Validate required columns in prediction file
# required_columns = ['question_id', 'predicted_answer']
# missing_columns = [col for col in required_columns if col not in predictions_df.columns]
# if missing_columns:
# return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
# load_leaderboard())
# # Validate 'Answer' column in ground truth file
# if 'Answer' not in ground_truth_df.columns:
# return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
# merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
# merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
# valid_predictions = merged_df.dropna(subset=['pred_answer'])
# correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
# total_predictions = len(merged_df)
# total_valid_predictions = len(valid_predictions)
# overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
# valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
# results = {
# 'model_name': model_name if model_name else "Unknown Model",
# 'overall_accuracy': overall_accuracy,
# 'valid_accuracy': valid_accuracy,
# 'correct_predictions': correct_predictions,
# 'total_questions': total_predictions,
# }
# if add_to_leaderboard:
# update_leaderboard(results)
# return "Evaluation completed and added to leaderboard.", load_leaderboard()
# else:
# return "Evaluation completed but not added to leaderboard.", load_leaderboard()
# except Exception as e:
# return f"Error during evaluation: {str(e)}", load_leaderboard()
def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
try:
ground_truth_path = hf_hub_download(
repo_id="SondosMB/ground-truth-dataset",
filename="ground_truth.csv",
repo_type="dataset",
use_auth_token=True
)
ground_truth_df = pd.read_csv(ground_truth_path)
except FileNotFoundError:
return "Ground truth file not found in the dataset repository.", load_leaderboard()
except Exception as e:
return f"Error loading ground truth: {e}", load_leaderboard()
if not prediction_file:
return "Prediction file not uploaded.", load_leaderboard()
try:
#load prediction file
predictions_df = pd.read_csv(prediction_file.name)
# Validate required columns in prediction file
required_columns = ['question_id', 'predicted_answer']
missing_columns = [col for col in required_columns if col not in predictions_df.columns]
if missing_columns:
return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
load_leaderboard())
# Validate 'Answer' column in ground truth file
if 'Answer' not in ground_truth_df.columns:
return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
valid_predictions = merged_df.dropna(subset=['pred_answer'])
correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
total_predictions = len(merged_df)
overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
results = {
'model_name': model_name if model_name else "Unknown Model",
'overall_accuracy': overall_accuracy,
'correct_predictions': correct_predictions,
'total_questions': total_predictions,
'Team_name': Team_name if Team_name else "Unknown Team",
}
if add_to_leaderboard:
update_leaderboard(results)
return "Evaluation completed and added to leaderboard.", load_leaderboard()
else:
return "Evaluation completed but not added to leaderboard.", load_leaderboard()
except Exception as e:
return f"Error during evaluation: {str(e)}", load_leaderboard()
initialize_leaderboard_file()
# Function to set default mode
# Function to set default mode
import gradio as gr
# # Custom CSS to match website style
# # Define CSS to match a modern, professional design
# # Define enhanced CSS for the entire layout
css_tech_theme = """
body {
font-family: 'Roboto', sans-serif;
background-color: #f4f6fa;
color: #333333;
margin: 0;
padding: 0;
}
/* Header Styling */
header {
text-align: center;
padding: 60px 20px;
background: linear-gradient(135deg, #6a1b9a, #64b5f6);
color: #ffffff;
border-radius: 12px;
margin-bottom: 30px;
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.2);
}
header h1 {
font-size: 3.5em;
font-weight: bold;
margin-bottom: 10px;
}
header h2 {
font-size: 2em;
margin-bottom: 15px;
}
header p {
font-size: 1em;
line-height: 1.8;
}
.header-buttons {
display: flex;
justify-content: center;
gap: 15px;
margin-top: 20px;
}
.header-buttons a {
text-decoration: none;
font-size: 1.5em;
padding: 15px 30px;
border-radius: 30px;
font-weight: bold;
background: #ffffff;
color: #6a1b9a;
transition: transform 0.3s, background 0.3s;
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
}
.header-buttons a:hover {
background: #64b5f6;
color: #ffffff;
transform: scale(1.05);
}
/* Pre-Tabs Section */
#pre-tabs{
text-align: left !important;
color:#6a1b9a
}
#pre-tabs h2 {
font-size: 3em
font-color:#6a1b9a
margin-bottom: 15px;
}
#pre-tabs p {
color: #555555;
line-height: 1.5;
font-size: 1.5em
}
/* Tabs Section */
.tabs {
margin: 0 auto;
padding: 20px;
background: #ffffff;
border-radius: 12px;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
/* max-width: 1300px; /* change 1 */ */
}
/* Post-Tabs Section */
.post-tabs {
text-align: center;
padding: 40px 20px;
background: linear-gradient(135deg, #64b5f6, #6a1b9a);
color: #ffffff;
border-radius: 12px;
margin-top: 30px;
}
.post-tabs h2 {
color: blue;
font-size: 3.4em;
margin-bottom: 15px;
}
.post-tabs p {
font-size: 2em;
line-height: 1.8;
margin-bottom: 20px;
}
.post-tabs a {
text-decoration: none;
font-size: 1.1em;
padding: 15px 30px;
border-radius: 30px;
font-weight: bold;
background: #ffffff;
color: #6a1b9a;
transition: transform 0.3s, background 0.3s;
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
}
.post-tabs a:hover {
background: #6a1b9a;
color: #ffffff;
transform: scale(1.05);
}
/* Footer */
#custom-footer {
background: linear-gradient(135deg, #6a1b9a, #8e44ad);
color: #ffffff;
text-align: center;
padding: 40px 20px;
margin-top: 30px;
border-radius: 12px;
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2);
}
#custom-footer h2 {
font-size: 1.5em;
margin-bottom: 15px;
}
#custom-footer p {
font-size: 0.8em;
line-height: 1.6;
margin-bottom: 20px;
}
/* Link Styling */
.social-links {
display: flex;
justify-content: center;
gap: 15px; /* Space between links */
}
.social-link {
display: inline-block;
text-decoration: none;
color: #ffffff;
background-color: #6a1b9a; /* Purple button background */
padding: 10px 20px;
border-radius: 30px;
font-size: 16px;
font-weight: bold;
transition: all 0.3s ease;
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
}
.social-link:hover {
background-color: #8c52d3; /* Darker shade on hover */
box-shadow: 0 6px 15px rgba(0, 0, 0, 0.2);
transform: translateY(-2px);
}
.social-link:active {
transform: translateY(1px);
box-shadow: 0 3px 8px rgba(0, 0, 0, 0.1);
}
#submission-buttons {
display: flex;
justify-content: center;
gap: 15px;
margin-top: 20px;
}
/* Buttons Styling */
#submission-buttons button {
padding: 12px 25px;
font-size: 1.1em;
color: #ffffff;
background: #6a1b9a;
border: none;
border-radius: 30px;
cursor: pointer;
font-weight: bold;
transition: all 0.3s ease;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
}
#submission-buttons button:hover {
background: #8c52d3; /* Slightly lighter purple */
transform: scale(1.05);
box-shadow: 0 6px 15px rgba(0, 0, 0, 0.2);
}
#submission-buttons button:active {
background: #5e1287; /* Darker purple */
transform: scale(0.98);
box-shadow: 0 3px 10px rgba(0, 0, 0, 0.1);
}
.gradio-container {
padding-bottom: 0 !important;
margin-bottom: 0 !important;
}
/* overview */
#overview {
border-radius: 12px;
}
#overview h2 {
font-size: 2.5em;
color: #6a1b9a !important;
text-align: left;
margin-bottom: 10px;
}
#overview h3 {
font-size: 2.2em;
color: #6a1b9a !important;
text-align: left;
margin-bottom: 20px;
}
#overview p {
font-size: 1.2em;
color: #333333;
line-height: 1.8;
margin-bottom: 15px;
}
#overview ul, #Overview ol {
font-size: 1.2em;
color: #555555;
margin: 20px 0;
padding-left: 40px;
}
#overview ul li, #Overview ol li {
margin-bottom: 10px;
font-size: 1.2em;
}
#overview ul li::marker, Overview ol li::marker {
color: #6a1b9a;
font-size: 1.2em;
}
overview a {
color: #6a1b9a;
text-decoration: underline;
}
overview a:hover {
color: #8c52d3;
}
footer {
margin-top: 0; /* Reduce space above the footer */
padding: 10px; /* Optional: Adjust padding inside the footer */
}
"""
# Create the Gradio Interface
with gr.Blocks(css=css_tech_theme) as demo:
# Header Section
gr.Markdown("""
The Mobile-MMLU Benchmark Competition provides an exceptional platform to showcase your
skills in mobile AI. Compete with innovators worldwide, drive technological advancements, and contribute
to shaping the future of mobile intelligence.
# The Mobile-MMLU Benchmark Competition provides an exceptional platform to showcase your
# skills in mobile AI. Compete with innovators worldwide, drive technological advancements, and contribute
# to shaping the future of mobile intelligence.
# π Mobile-MMLU Challenge
π Pushing the Limits of Mobile LLMs
π Why Participate? π
π Why Participate? π
#
The Mobile-MMLU Benchmark Competition is a premier challenge designed to evaluate and advance mobile-optimized Large Language Models (LLMs). This competition is an excellent opportunity to showcase your model's ability to handle real-world scenarios and excel in mobile intelligence.
With a dataset spanning 80 distinct fields and featuring 16,186 questions, the competition emphasizes practical applications, from education and healthcare to technology and daily life.
Participating in this competition allows you to:
Upload your prediction file and provide your model name to evaluate and optionally submit your results to the leaderboard.
Don't miss this opportunity to showcase your expertise in mobile AI! Participate in the competition, submit your predictions, and compare your results with the best in the field.