import gradio as gr import pandas as pd import os import re from datetime import datetime from huggingface_hub import hf_hub_download from huggingface_hub import HfApi, HfFolder from constants import CITATION_TEXT LEADERBOARD_FILE = "leaderboard.csv" GROUND_TRUTH_FILE = "ground_truth.csv" LAST_UPDATED = datetime.now().strftime("%B %d, %Y") # Ensure authentication and suppress warnings os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1" HF_TOKEN = os.getenv("HF_TOKEN") if not HF_TOKEN: raise ValueError("HF_TOKEN environment variable is not set or invalid.") # def initialize_leaderboard_file(): # """ # Ensure the leaderboard file exists and has the correct headers. # """ # if not os.path.exists(LEADERBOARD_FILE): # pd.DataFrame(columns=[ # "Model Name", "Overall Accuracy", "Valid Accuracy", # "Correct Predictions", "Total Questions", "Timestamp" # ]).to_csv(LEADERBOARD_FILE, index=False) # elif os.stat(LEADERBOARD_FILE).st_size == 0: # pd.DataFrame(columns=[ # "Model Name", "Overall Accuracy", "Valid Accuracy", # "Correct Predictions", "Total Questions", "Timestamp" # ]).to_csv(LEADERBOARD_FILE, index=False) # def clean_answer(answer): # if pd.isna(answer): # return None # answer = str(answer) # clean = re.sub(r'[^A-Da-d]', '', answer) # return clean[0].upper() if clean else None # def update_leaderboard(results): # """ # Append new submission results to the leaderboard file and push updates to the Hugging Face repository. # """ # new_entry = { # "Model Name": results['model_name'], # "Overall Accuracy": round(results['overall_accuracy'] * 100, 2), # "Valid Accuracy": round(results['valid_accuracy'] * 100, 2), # "Correct Predictions": results['correct_predictions'], # "Total Questions": results['total_questions'], # "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), # } # try: # # Update the local leaderboard file # new_entry_df = pd.DataFrame([new_entry]) # file_exists = os.path.exists(LEADERBOARD_FILE) # new_entry_df.to_csv( # LEADERBOARD_FILE, # mode='a', # Append mode # index=False, # header=not file_exists # Write header only if the file is new # ) # print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}") # # Push the updated file to the Hugging Face repository using HTTP API # api = HfApi() # token = HfFolder.get_token() # api.upload_file( # path_or_fileobj=LEADERBOARD_FILE, # path_in_repo="leaderboard.csv", # repo_id="SondosMB/ss", # Your Space repository # repo_type="space", # token=token # ) # print("Leaderboard changes pushed to Hugging Face repository.") # except Exception as e: # print(f"Error updating leaderboard file: {e}") # def load_leaderboard(): # if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0: # return pd.DataFrame({ # "Model Name": [], # "Overall Accuracy": [], # "Valid Accuracy": [], # "Correct Predictions": [], # "Total Questions": [], # "Timestamp": [], # }) # return pd.read_csv(LEADERBOARD_FILE) # def evaluate_predictions(prediction_file, model_name, add_to_leaderboard): # try: # ground_truth_path = hf_hub_download( # repo_id="SondosMB/ground-truth-dataset", # filename="ground_truth.csv", # repo_type="dataset", # use_auth_token=True # ) # ground_truth_df = pd.read_csv(ground_truth_path) # except FileNotFoundError: # return "Ground truth file not found in the dataset repository.", load_leaderboard() # except Exception as e: # return f"Error loading ground truth: {e}", load_leaderboard() # if not prediction_file: # return "Prediction file not uploaded.", load_leaderboard() # try: # #load predition file # predictions_df = pd.read_csv(prediction_file.name) # # Validate required columns in prediction file # required_columns = ['question_id', 'predicted_answer'] # missing_columns = [col for col in required_columns if col not in predictions_df.columns] # if missing_columns: # return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.", # load_leaderboard()) # # Validate 'Answer' column in ground truth file # if 'Answer' not in ground_truth_df.columns: # return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard() # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner') # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer) # valid_predictions = merged_df.dropna(subset=['pred_answer']) # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum() # total_predictions = len(merged_df) # total_valid_predictions = len(valid_predictions) # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0 # valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0 # results = { # 'model_name': model_name if model_name else "Unknown Model", # 'overall_accuracy': overall_accuracy, # } # if add_to_leaderboard: # update_leaderboard(results) # return "Evaluation completed and added to leaderboard.", load_leaderboard() # else: # return "Evaluation completed but not added to leaderboard.", load_leaderboard() # except Exception as e: # return f"Error during evaluation: {str(e)}", load_leaderboard() # initialize_leaderboard_file() # def initialize_leaderboard_file(): # """ # Ensure the leaderboard file exists and has the correct headers. # """ # if not os.path.exists(LEADERBOARD_FILE): # pd.DataFrame(columns=[ # "Model Name", "Overall Accuracy", "Valid Accuracy", # "Correct Predictions", "Total Questions", "Timestamp" # ]).to_csv(LEADERBOARD_FILE, index=False) # elif os.stat(LEADERBOARD_FILE).st_size == 0: # pd.DataFrame(columns=[ # "Model Name", "Overall Accuracy", "Valid Accuracy", # "Correct Predictions", "Total Questions", "Timestamp" # ]).to_csv(LEADERBOARD_FILE, index=False) def initialize_leaderboard_file(): """ Ensure the leaderboard file exists and has the correct headers. """ if not os.path.exists(LEADERBOARD_FILE): pd.DataFrame(columns=[ "Model Name", "Overall Accuracy", "Correct Predictions", "Total Questions", "Timestamp", "Team Name" ]).to_csv(LEADERBOARD_FILE, index=False) elif os.stat(LEADERBOARD_FILE).st_size == 0: pd.DataFrame(columns=[ "Model Name", "Overall Accuracy", "Correct Predictions", "Total Questions", "Timestamp", "Team Name" ]).to_csv(LEADERBOARD_FILE, index=False) def clean_answer(answer): if pd.isna(answer): return None answer = str(answer) clean = re.sub(r'[^A-Da-d]', '', answer) return clean[0].upper() if clean else None # def update_leaderboard(results): # """ # Append new submission results to the leaderboard file and push updates to the Hugging Face repository. # """ # new_entry = { # "Model Name": results['model_name'], # "Overall Accuracy": round(results['overall_accuracy'] * 100, 2), # "Valid Accuracy": round(results['valid_accuracy'] * 100, 2), # "Correct Predictions": results['correct_predictions'], # "Total Questions": results['total_questions'], # "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), # } # try: # # Update the local leaderboard file # new_entry_df = pd.DataFrame([new_entry]) # file_exists = os.path.exists(LEADERBOARD_FILE) # new_entry_df.to_csv( # LEADERBOARD_FILE, # mode='a', # Append mode # index=False, # header=not file_exists # Write header only if the file is new # ) # print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}") # # Push the updated file to the Hugging Face repository using HTTP API # api = HfApi() # token = HfFolder.get_token() # api.upload_file( # path_or_fileobj=LEADERBOARD_FILE, # path_in_repo="leaderboard.csv", # repo_id="SondosMB/ss", # Your Space repository # repo_type="space", # token=token # ) # print("Leaderboard changes pushed to Hugging Face repository.") # except Exception as e: # print(f"Error updating leaderboard file: {e}") def update_leaderboard(results): """ Append new submission results to the leaderboard file and push updates to the Hugging Face repository. """ new_entry = { "Model Name": results['model_name'], "Overall Accuracy": round(results['overall_accuracy'] * 100, 2), "Correct Predictions": results['correct_predictions'], "Total Questions": results['total_questions'], "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "Team Name": results['Team_name'] } try: # Update the local leaderboard file new_entry_df = pd.DataFrame([new_entry]) file_exists = os.path.exists(LEADERBOARD_FILE) new_entry_df.to_csv( LEADERBOARD_FILE, mode='a', # Append mode index=False, header=not file_exists # Write header only if the file is new ) print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}") # Push the updated file to the Hugging Face repository using HTTP API api = HfApi() token = HfFolder.get_token() api.upload_file( path_or_fileobj=LEADERBOARD_FILE, path_in_repo="leaderboard.csv", repo_id="SondosMB/Mobile-MMLU", # Your Space repository repo_type="space", token=token ) print("Leaderboard changes pushed to Hugging Face repository.") except Exception as e: print(f"Error updating leaderboard file: {e}") # def load_leaderboard(): # if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0: # return pd.DataFrame({ # "Model Name": [], # "Overall Accuracy": [], # "Valid Accuracy": [], # "Correct Predictions": [], # "Total Questions": [], # "Timestamp": [], # }) # return pd.read_csv(LEADERBOARD_FILE) def load_leaderboard(): if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0: return pd.DataFrame({ "Model Name": [], "Overall Accuracy": [], "Correct Predictions": [], "Total Questions": [], "Timestamp": [], "Team Name": [], }) return pd.read_csv(LEADERBOARD_FILE) # def evaluate_predictions(prediction_file, model_name, add_to_leaderboard): # try: # ground_truth_path = hf_hub_download( # repo_id="SondosMB/ground-truth-dataset", # filename="ground_truth.csv", # repo_type="dataset", # use_auth_token=True # ) # ground_truth_df = pd.read_csv(ground_truth_path) # except FileNotFoundError: # return "Ground truth file not found in the dataset repository.", load_leaderboard() # except Exception as e: # return f"Error loading ground truth: {e}", load_leaderboard() # if not prediction_file: # return "Prediction file not uploaded.", load_leaderboard() # try: # #load predition file # predictions_df = pd.read_csv(prediction_file.name) # # Validate required columns in prediction file # required_columns = ['question_id', 'predicted_answer'] # missing_columns = [col for col in required_columns if col not in predictions_df.columns] # if missing_columns: # return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.", # load_leaderboard()) # # Validate 'Answer' column in ground truth file # if 'Answer' not in ground_truth_df.columns: # return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard() # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner') # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer) # valid_predictions = merged_df.dropna(subset=['pred_answer']) # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum() # total_predictions = len(merged_df) # total_valid_predictions = len(valid_predictions) # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0 # valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0 # results = { # 'model_name': model_name if model_name else "Unknown Model", # 'overall_accuracy': overall_accuracy, # 'valid_accuracy': valid_accuracy, # 'correct_predictions': correct_predictions, # 'total_questions': total_predictions, # } # if add_to_leaderboard: # update_leaderboard(results) # return "Evaluation completed and added to leaderboard.", load_leaderboard() # else: # return "Evaluation completed but not added to leaderboard.", load_leaderboard() # except Exception as e: # return f"Error during evaluation: {str(e)}", load_leaderboard() def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard): try: ground_truth_path = hf_hub_download( repo_id="SondosMB/ground-truth-dataset", filename="ground_truth.csv", repo_type="dataset", use_auth_token=True ) ground_truth_df = pd.read_csv(ground_truth_path) except FileNotFoundError: return "Ground truth file not found in the dataset repository.", load_leaderboard() except Exception as e: return f"Error loading ground truth: {e}", load_leaderboard() if not prediction_file: return "Prediction file not uploaded.", load_leaderboard() try: #load prediction file predictions_df = pd.read_csv(prediction_file.name) # Validate required columns in prediction file required_columns = ['question_id', 'predicted_answer'] missing_columns = [col for col in required_columns if col not in predictions_df.columns] if missing_columns: return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.", load_leaderboard()) # Validate 'Answer' column in ground truth file if 'Answer' not in ground_truth_df.columns: return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard() merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner') merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer) valid_predictions = merged_df.dropna(subset=['pred_answer']) correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum() total_predictions = len(merged_df) overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0 results = { 'model_name': model_name if model_name else "Unknown Model", 'overall_accuracy': overall_accuracy, 'correct_predictions': correct_predictions, 'total_questions': total_predictions, 'Team_name': Team_name if Team_name else "Unknown Team", } if add_to_leaderboard: update_leaderboard(results) return "Evaluation completed and added to leaderboard.", load_leaderboard() else: return "Evaluation completed but not added to leaderboard.", load_leaderboard() except Exception as e: return f"Error during evaluation: {str(e)}", load_leaderboard() initialize_leaderboard_file() # Function to set default mode # Function to set default mode import gradio as gr # # Custom CSS to match website style # # Define CSS to match a modern, professional design # # Define enhanced CSS for the entire layout css_tech_theme = """ body { font-family: 'Roboto', sans-serif; background-color: #f4f6fa; color: #333333; margin: 0; padding: 0; } /* Header Styling */ header { text-align: center; padding: 60px 20px; background: linear-gradient(135deg, #6a1b9a, #64b5f6); color: #ffffff; border-radius: 12px; margin-bottom: 30px; box-shadow: 0 6px 20px rgba(0, 0, 0, 0.2); } header h1 { font-size: 3.5em; font-weight: bold; margin-bottom: 10px; } header h2 { font-size: 2em; margin-bottom: 15px; } header p { font-size: 1em; line-height: 1.8; } .header-buttons { display: flex; justify-content: center; gap: 15px; margin-top: 20px; } .header-buttons a { text-decoration: none; font-size: 1.5em; padding: 15px 30px; border-radius: 30px; font-weight: bold; background: #ffffff; color: #6a1b9a; transition: transform 0.3s, background 0.3s; box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1); } .header-buttons a:hover { background: #64b5f6; color: #ffffff; transform: scale(1.05); } /* Pre-Tabs Section */ #pre-tabs{ text-align: left !important; color:#6a1b9a } #pre-tabs h2 { font-size: 3em font-color:#6a1b9a margin-bottom: 15px; } #pre-tabs p { color: #555555; line-height: 1.5; font-size: 1.5em } /* Tabs Section */ .tabs { margin: 0 auto; padding: 20px; background: #ffffff; border-radius: 12px; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1); /* max-width: 1300px; /* change 1 */ */ } /* Post-Tabs Section */ .post-tabs { text-align: center; padding: 40px 20px; background: linear-gradient(135deg, #64b5f6, #6a1b9a); color: #ffffff; border-radius: 12px; margin-top: 30px; } .post-tabs h2 { color: blue; font-size: 3.4em; margin-bottom: 15px; } .post-tabs p { font-size: 2em; line-height: 1.8; margin-bottom: 20px; } .post-tabs a { text-decoration: none; font-size: 1.1em; padding: 15px 30px; border-radius: 30px; font-weight: bold; background: #ffffff; color: #6a1b9a; transition: transform 0.3s, background 0.3s; box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1); } .post-tabs a:hover { background: #6a1b9a; color: #ffffff; transform: scale(1.05); } /* Footer */ #custom-footer { background: linear-gradient(135deg, #6a1b9a, #8e44ad); color: #ffffff; text-align: center; padding: 40px 20px; margin-top: 30px; border-radius: 12px; box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2); } #custom-footer h2 { font-size: 1.5em; margin-bottom: 15px; } #custom-footer p { font-size: 0.8em; line-height: 1.6; margin-bottom: 20px; } /* Link Styling */ .social-links { display: flex; justify-content: center; gap: 15px; /* Space between links */ } .social-link { display: inline-block; text-decoration: none; color: #ffffff; background-color: #6a1b9a; /* Purple button background */ padding: 10px 20px; border-radius: 30px; font-size: 16px; font-weight: bold; transition: all 0.3s ease; box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1); } .social-link:hover { background-color: #8c52d3; /* Darker shade on hover */ box-shadow: 0 6px 15px rgba(0, 0, 0, 0.2); transform: translateY(-2px); } .social-link:active { transform: translateY(1px); box-shadow: 0 3px 8px rgba(0, 0, 0, 0.1); } #submission-buttons { display: flex; justify-content: center; gap: 15px; margin-top: 20px; } /* Buttons Styling */ #submission-buttons button { padding: 12px 25px; font-size: 1.1em; color: #ffffff; background: #6a1b9a; border: none; border-radius: 30px; cursor: pointer; font-weight: bold; transition: all 0.3s ease; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); } #submission-buttons button:hover { background: #8c52d3; /* Slightly lighter purple */ transform: scale(1.05); box-shadow: 0 6px 15px rgba(0, 0, 0, 0.2); } #submission-buttons button:active { background: #5e1287; /* Darker purple */ transform: scale(0.98); box-shadow: 0 3px 10px rgba(0, 0, 0, 0.1); } .gradio-container { padding-bottom: 0 !important; margin-bottom: 0 !important; } /* overview */ #overview { border-radius: 12px; } #overview h2 { font-size: 2.5em; color: #6a1b9a !important; text-align: left; margin-bottom: 10px; } #overview h3 { font-size: 2.2em; color: #6a1b9a !important; text-align: left; margin-bottom: 20px; } #overview p { font-size: 1.2em; color: #333333; line-height: 1.8; margin-bottom: 15px; } #overview ul, #Overview ol { font-size: 1.2em; color: #555555; margin: 20px 0; padding-left: 40px; } #overview ul li, #Overview ol li { margin-bottom: 10px; font-size: 1.2em; } #overview ul li::marker, Overview ol li::marker { color: #6a1b9a; font-size: 1.2em; } overview a { color: #6a1b9a; text-decoration: underline; } overview a:hover { color: #8c52d3; } footer { margin-top: 0; /* Reduce space above the footer */ padding: 10px; /* Optional: Adjust padding inside the footer */ } """ # Create the Gradio Interface with gr.Blocks(css=css_tech_theme) as demo: # Header Section gr.Markdown("""

πŸ† Mobile-MMLU Challenge

πŸš€ Pushing the Limits of Mobile LLMs

""") # # Pre-Tabs Section gr.Markdown("""

🌟 Why Participate? 🌟

The Mobile-MMLU Benchmark Competition provides an exceptional platform to showcase your skills in mobile AI. Compete with innovators worldwide, drive technological advancements, and contribute to shaping the future of mobile intelligence.

""", elem_id="pre-tabs") # gr.Markdown(""" #
#

🌟 Why Participate? 🌟

#

# The Mobile-MMLU Benchmark Competition provides an exceptional platform to showcase your # skills in mobile AI. Compete with innovators worldwide, drive technological advancements, and contribute # to shaping the future of mobile intelligence. #

#
""", elem_id="pre-tabs") # Tabs Section with gr.Tabs(elem_id="tabs"): # Overview Tab with gr.TabItem("πŸ“– Overview"): gr.Markdown( """

About the Competition

The Mobile-MMLU Benchmark Competition is a premier challenge designed to evaluate and advance mobile-optimized Large Language Models (LLMs). This competition is an excellent opportunity to showcase your model's ability to handle real-world scenarios and excel in mobile intelligence.

With a dataset spanning 80 distinct fields and featuring 16,186 questions, the competition emphasizes practical applications, from education and healthcare to technology and daily life.

Why Compete?

Participating in this competition allows you to:

How It Works

  1. 1️⃣ Download the Dataset: Access the dataset and detailed instructions on the GitHub page. Follow the steps to ensure your environment is set up correctly.
  2. 2️⃣ Generate Predictions: Use the provided script in the GitHub repository to generate answers. Ensure the output file matches the format in the github
  3. 3️⃣ Submit Predictions: Upload your CSV file to the Submission Page on this platform.
  4. 4️⃣ Evaluation: Your submission will be scored based on accuracy. The results will include overall accuracy metric.
  5. 5️⃣ Leaderboard: Optionally, add your results to the real-time leaderboard to compare your model's performance with others.

Resources

""",elem_id="overview") with gr.TabItem("πŸ“€ Submission"): gr.Markdown("""

Submit Your Predictions

Upload your prediction file and provide your model name to evaluate and optionally submit your results to the leaderboard.

""") with gr.Row(elem_id="submission-fields"): file_input = gr.File(label="πŸ“‚ Upload Prediction CSV", file_types=[".csv"], interactive=True,scale=1, min_width=12000) model_name_input = gr.Textbox(label="🏷️ Model Name", placeholder="Enter your model name",scale=1, min_width=800) Team_name_input = gr.Textbox(label="🏷️ Team Name", placeholder="Enter your Team name",scale=1, min_width=800) with gr.Row(elem_id="submission-results"): overall_accuracy_display = gr.Number(label="πŸ“Š Overall Accuracy (%)", interactive=False,scale=1,min_width=1200) with gr.Row(elem_id="submission-buttons"): eval_button = gr.Button("πŸ“ˆ Evaluate",scale=1,min_width=1200) submit_button = gr.Button("πŸ“€ Prove and Submit to Leaderboard", elem_id="evaluation-status", visible=False,scale=1,min_width=1200) eval_status = gr.Textbox(label="πŸ› οΈ Evaluation Status", interactive=False,scale=1,min_width=1200) # Define the functions outside the `with` block # def handle_evaluation(file, model_name): # # Check if required inputs are provided # if not file: # return "Error: Please upload a prediction file.", 0, gr.update(visible=False) # if not model_name or model_name.strip() == "": # return "Error: Please enter a model name.", 0, gr.update(visible=False) # try: # # Load predictions file # predictions_df = pd.read_csv(file.name) # # Validate required columns in the prediction file # required_columns = ['question_id', 'predicted_answer'] # missing_columns = [col for col in required_columns if col not in predictions_df.columns] # if missing_columns: # return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.", # 0, gr.update(visible=False)) # # Perform evaluation # status, leaderboard = evaluate_predictions(file, model_name, add_to_leaderboard=False) # if leaderboard.empty: # overall_accuracy = 0 # else: # overall_accuracy = leaderboard.iloc[-1]["Overall Accuracy"] # # Show the submit button after successful evaluation # return status, overall_accuracy, gr.update(visible=True) # except Exception as e: # # Handle unexpected errors # return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False) def handle_evaluation(file, model_name, Team_name): if not file: return "Error: Please upload a prediction file.", 0, gr.update(visible=False) if not model_name or model_name.strip() == "": return "Error: Please enter a model name.", 0, gr.update(visible=False) if not Team_name or Team_name.strip() == "": return "Error: Please enter a Team name.", 0, gr.update(visible=False) try: # Load predictions file predictions_df = pd.read_csv(file.name) # Validate required columns required_columns = ['question_id', 'predicted_answer'] missing_columns = [col for col in required_columns if col not in predictions_df.columns] if missing_columns: return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.", 0, gr.update(visible=False)) # Load ground truth try: ground_truth_path = hf_hub_download( repo_id="SondosMB/ground-truth-dataset", filename="ground_truth.csv", repo_type="dataset", use_auth_token=True ) ground_truth_df = pd.read_csv(ground_truth_path) except Exception as e: return f"Error loading ground truth: {e}", 0, gr.update(visible=False) # Perform evaluation calculations merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner') merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer) valid_predictions = merged_df.dropna(subset=['pred_answer']) correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum() total_predictions = len(merged_df) overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0 return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True) except Exception as e: return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False) def handle_submission(file, model_name,Team_name): # Handle leaderboard submission status, _ = evaluate_predictions(file, model_name,Team_name, add_to_leaderboard=True) return f"Submission to leaderboard completed: {status}" # Connect button clicks to the functions eval_button.click( handle_evaluation, inputs=[file_input, model_name_input,Team_name_input], outputs=[eval_status, overall_accuracy_display, submit_button], ) submit_button.click( handle_submission, inputs=[file_input, model_name_input,Team_name_input], outputs=[eval_status], ) with gr.TabItem("πŸ… Leaderboard"): leaderboard_table = gr.Dataframe( value=load_leaderboard(), label="Leaderboard", interactive=False, wrap=True, ) refresh_button = gr.Button("Refresh Leaderboard") refresh_button.click( lambda: load_leaderboard(), inputs=[], outputs=[leaderboard_table], ) # Post-Tabs Section # gr.Markdown(""" #
#

Ready to Compete?

#

# Submit your predictions today and make your mark in advancing mobile AI technologies. # Show the world what your model can achieve! #

#

# """) # Post-Tabs Section gr.Markdown("""

🌟 Ready to Compete? 🌟

Don't miss this opportunity to showcase your expertise in mobile AI! Participate in the competition, submit your predictions, and compare your results with the best in the field.

""") with gr.Row(): with gr.Accordion("πŸ“™ Citation", open=False): gr.Textbox( value=CITATION_TEXT, lines=18, label="",elem_id="citation-button", show_copy_button=True) # Footer Section gr.Markdown(""" """,elem_id="custom-footer") demo.launch()