Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Runtime error

App Files Files Community

SondosMB commited on Mar 26

Commit

93bbadb

verified ·

1 Parent(s): 46227fe

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -321

app.py CHANGED Viewed

@@ -20,170 +20,9 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("HF_TOKEN environment variable is not set or invalid.")
-# def initialize_leaderboard_file():
-#     """
-#     Ensure the leaderboard file exists and has the correct headers.
-#     """
-#     if not os.path.exists(LEADERBOARD_FILE):
-#         pd.DataFrame(columns=[
-#            "Model Name", "Overall Accuracy", "Valid Accuracy",
-#             "Correct Predictions", "Total Questions", "Timestamp"
-#         ]).to_csv(LEADERBOARD_FILE, index=False)
-#     elif os.stat(LEADERBOARD_FILE).st_size == 0:
-#         pd.DataFrame(columns=[
-#             "Model Name", "Overall Accuracy", "Valid Accuracy",
-#             "Correct Predictions", "Total Questions", "Timestamp"
-#         ]).to_csv(LEADERBOARD_FILE, index=False)
-# def clean_answer(answer):
-#     if pd.isna(answer):
-#         return None
-#     answer = str(answer)
-#     clean = re.sub(r'[^A-Da-d]', '', answer)
-#     return clean[0].upper() if clean else None
-# def update_leaderboard(results):
-#     """
-#     Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
-#     """
-#     new_entry = {
-#        "Model Name": results['model_name'],
-#         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
-#         "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
-#         "Correct Predictions": results['correct_predictions'],
-#         "Total Questions": results['total_questions'],
-#         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-#     }
-#     try:
-#         # Update the local leaderboard file
-#         new_entry_df = pd.DataFrame([new_entry])
-#         file_exists = os.path.exists(LEADERBOARD_FILE)
-#         new_entry_df.to_csv(
-#             LEADERBOARD_FILE,
-#             mode='a',  # Append mode
-#             index=False,
-#             header=not file_exists  # Write header only if the file is new
-#         )
-#         print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
-#         # Push the updated file to the Hugging Face repository using HTTP API
-#         api = HfApi()
-#         token = HfFolder.get_token()
-#         api.upload_file(
-#             path_or_fileobj=LEADERBOARD_FILE,
-#             path_in_repo="leaderboard.csv",
-#             repo_id="SondosMB/ss",  # Your Space repository
-#             repo_type="space",
-#             token=token
-#         )
-#         print("Leaderboard changes pushed to Hugging Face repository.")
-#     except Exception as e:
-#         print(f"Error updating leaderboard file: {e}")
-# def load_leaderboard():
-#     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
-#         return pd.DataFrame({
-#             "Model Name": [],
-#             "Overall Accuracy": [],
-#             "Valid Accuracy": [],
-#             "Correct Predictions": [],
-#             "Total Questions": [],
-#             "Timestamp": [],
-#         })
-#     return pd.read_csv(LEADERBOARD_FILE)
-# def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
-#     try:
-#         ground_truth_path = hf_hub_download(
-#             repo_id="SondosMB/ground-truth-dataset",
-#             filename="ground_truth.csv",
-#             repo_type="dataset",
-#             use_auth_token=True
-#         )
-#         ground_truth_df = pd.read_csv(ground_truth_path)
-#     except FileNotFoundError:
-#         return "Ground truth file not found in the dataset repository.", load_leaderboard()
-#     except Exception as e:
-#         return f"Error loading ground truth: {e}", load_leaderboard()
-#     if not prediction_file:
-#         return "Prediction file not uploaded.", load_leaderboard()
-#     try:
-#         #load predition file
-#         predictions_df = pd.read_csv(prediction_file.name)
-#          # Validate required columns in prediction file
-#         required_columns = ['question_id', 'predicted_answer']
-#         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
-#         if missing_columns:
-#             return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
-#                     load_leaderboard())
-#         # Validate 'Answer' column in ground truth file
-#         if 'Answer' not in ground_truth_df.columns:
-#             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
-#         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
-#         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
-#         valid_predictions = merged_df.dropna(subset=['pred_answer'])
-#         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
-#         total_predictions = len(merged_df)
-#         total_valid_predictions = len(valid_predictions)
-#         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
-#         valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
-#         results = {
-#             'model_name': model_name if model_name else "Unknown Model",
-#             'overall_accuracy': overall_accuracy,
-#         }
-#         if add_to_leaderboard:
-#             update_leaderboard(results)
-#             return "Evaluation completed and added to leaderboard.", load_leaderboard()
-#         else:
-#             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
-#     except Exception as e:
-#         return f"Error during evaluation: {str(e)}", load_leaderboard()
-# initialize_leaderboard_file()
-# def initialize_leaderboard_file():
-#     """
-#     Ensure the leaderboard file exists and has the correct headers.
-#     """
-#     if not os.path.exists(LEADERBOARD_FILE):
-#         pd.DataFrame(columns=[
-#             "Model Name", "Overall Accuracy", "Valid Accuracy",
-#             "Correct Predictions", "Total Questions", "Timestamp"
-#         ]).to_csv(LEADERBOARD_FILE, index=False)
-#     elif os.stat(LEADERBOARD_FILE).st_size == 0:
-#         pd.DataFrame(columns=[
-#             "Model Name", "Overall Accuracy", "Valid Accuracy",
-#             "Correct Predictions", "Total Questions", "Timestamp"
-#         ]).to_csv(LEADERBOARD_FILE, index=False)
-# def initialize_leaderboard_file():
-#     """
-#     Ensure the leaderboard file exists and has the correct headers.
-#     """
-#     if not os.path.exists(LEADERBOARD_FILE):
-#         pd.DataFrame(columns=[
-#             "Model Name", "Overall Accuracy", "Correct Predictions",
-#             "Total Questions", "Timestamp", "Team Name"
-#         ]).to_csv(LEADERBOARD_FILE, index=False)
-#     elif os.stat(LEADERBOARD_FILE).st_size == 0:
-#         pd.DataFrame(columns=[
-#             "Model Name", "Overall Accuracy", "Correct Predictions",
-#             "Total Questions", "Timestamp", "Team Name"
-#         ]).to_csv(LEADERBOARD_FILE, index=False)
 def initialize_leaderboard_file():
     """
     Ensure the leaderboard file exists and has the correct headers.
@@ -222,47 +61,7 @@ def clean_answer(answer):
     return clean[0].upper() if clean else None
-# def update_leaderboard(results):
-#     """
-#     Append new submission results to the leaderboard file and push updates to the Hugging Face repository.
-#     """
-#     new_entry = {
-#         "Model Name": results['model_name'],
-#         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
-#         "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
-#         "Correct Predictions": results['correct_predictions'],
-#         "Total Questions": results['total_questions'],
-#         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-#     }
-#     try:
-#         # Update the local leaderboard file
-#         new_entry_df = pd.DataFrame([new_entry])
-#         file_exists = os.path.exists(LEADERBOARD_FILE)
-#         new_entry_df.to_csv(
-#             LEADERBOARD_FILE,
-#             mode='a',  # Append mode
-#             index=False,
-#             header=not file_exists  # Write header only if the file is new
-#         )
-#         print(f"Leaderboard updated successfully at {LEADERBOARD_FILE}")
-#         # Push the updated file to the Hugging Face repository using HTTP API
-#         api = HfApi()
-#         token = HfFolder.get_token()
-#         api.upload_file(
-#             path_or_fileobj=LEADERBOARD_FILE,
-#             path_in_repo="leaderboard.csv",
-#             repo_id="SondosMB/ss",  # Your Space repository
-#             repo_type="space",
-#             token=token
-#         )
-#         print("Leaderboard changes pushed to Hugging Face repository.")
-#     except Exception as e:
-#         print(f"Error updating leaderboard file: {e}")
 def update_leaderboard(results):
     """
@@ -349,21 +148,23 @@ def update_leaderboard_pro(results):
         print(f"Error updating leaderboard file: {e}")
 # def load_leaderboard():
 #     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
 #         return pd.DataFrame({
 #             "Model Name": [],
 #             "Overall Accuracy": [],
-#             "Valid Accuracy": [],
 #             "Correct Predictions": [],
 #             "Total Questions": [],
 #             "Timestamp": [],
 #         })
 #     return pd.read_csv(LEADERBOARD_FILE)
 def load_leaderboard():
     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
         return pd.DataFrame({
             "Model Name": [],
             "Overall Accuracy": [],
@@ -371,9 +172,38 @@ def load_leaderboard():
             "Total Questions": [],
             "Timestamp": [],
             "Team Name": [],
         })
-    return pd.read_csv(LEADERBOARD_FILE)
 def load_leaderboard_pro():
     if not os.path.exists(LEADERBOARD_FILE_pro) or os.stat(LEADERBOARD_FILE_pro).st_size == 0:
@@ -388,120 +218,7 @@ def load_leaderboard_pro():
         })
     return pd.read_csv(LEADERBOARD_FILE_pro)
-# def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
-#     try:
-#         ground_truth_path = hf_hub_download(
-#             repo_id="SondosMB/ground-truth-dataset",
-#             filename="ground_truth.csv",
-#             repo_type="dataset",
-#             use_auth_token=True
-#         )
-#         ground_truth_df = pd.read_csv(ground_truth_path)
-#     except FileNotFoundError:
-#         return "Ground truth file not found in the dataset repository.", load_leaderboard()
-#     except Exception as e:
-#         return f"Error loading ground truth: {e}", load_leaderboard()
-#     if not prediction_file:
-#         return "Prediction file not uploaded.", load_leaderboard()
-#     try:
-#         #load predition file
-#         predictions_df = pd.read_csv(prediction_file.name)
-#          # Validate required columns in prediction file
-#         required_columns = ['question_id', 'predicted_answer']
-#         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
-#         if missing_columns:
-#             return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
-#                     load_leaderboard())
-#         # Validate 'Answer' column in ground truth file
-#         if 'Answer' not in ground_truth_df.columns:
-#             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
-#         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
-#         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
-#         valid_predictions = merged_df.dropna(subset=['pred_answer'])
-#         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
-#         total_predictions = len(merged_df)
-#         total_valid_predictions = len(valid_predictions)
-#         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
-#         valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
-#         results = {
-#             'model_name': model_name if model_name else "Unknown Model",
-#             'overall_accuracy': overall_accuracy,
-#             'valid_accuracy': valid_accuracy,
-#             'correct_predictions': correct_predictions,
-#             'total_questions': total_predictions,
-#         }
-#         if add_to_leaderboard:
-#             update_leaderboard(results)
-#             return "Evaluation completed and added to leaderboard.", load_leaderboard()
-#         else:
-#             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
-#     except Exception as e:
-#         return f"Error during evaluation: {str(e)}", load_leaderboard()
-# def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
-#     try:
-#         ground_truth_path = hf_hub_download(
-#             repo_id="SondosMB/ground-truth-dataset",
-#             filename="ground_truth.csv",
-#             repo_type="dataset",
-#             use_auth_token=True
-#         )
-#         ground_truth_df = pd.read_csv(ground_truth_path)
-#     except FileNotFoundError:
-#         return "Ground truth file not found in the dataset repository.", load_leaderboard()
-#     except Exception as e:
-#         return f"Error loading ground truth: {e}", load_leaderboard()
-#     if not prediction_file:
-#         return "Prediction file not uploaded.", load_leaderboard()
-#     try:
-#         #load prediction file
-#         predictions_df = pd.read_csv(prediction_file.name)
-#         # Validate required columns in prediction file
-#         required_columns = ['question_id', 'predicted_answer']
-#         missing_columns = [col for col in required_columns if col not in predictions_df.columns]
-#         if missing_columns:
-#             return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
-#                     load_leaderboard())
-#         # Validate 'Answer' column in ground truth file
-#         if 'Answer' not in ground_truth_df.columns:
-#             return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
-#         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
-#         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
-#         valid_predictions = merged_df.dropna(subset=['pred_answer'])
-#         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
-#         total_predictions = len(merged_df)
-#         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
-#         results = {
-#             'model_name': model_name if model_name else "Unknown Model",
-#             'overall_accuracy': overall_accuracy,
-#             'correct_predictions': correct_predictions,
-#             'total_questions': total_predictions,
-#             'Team_name': Team_name if Team_name else "Unknown Team",
-#         }
-#         if add_to_leaderboard:
-#             update_leaderboard(results)
-#             return "Evaluation completed and added to leaderboard.", load_leaderboard()
-#         else:
-#             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
-#     except Exception as e:
-#         return f"Error during evaluation: {str(e)}", load_leaderboard()
-# initialize_leaderboard_file()
 def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
     try:

 if not HF_TOKEN:
     raise ValueError("HF_TOKEN environment variable is not set or invalid.")
 def initialize_leaderboard_file():
     """
     Ensure the leaderboard file exists and has the correct headers.
     return clean[0].upper() if clean else None
 def update_leaderboard(results):
     """
         print(f"Error updating leaderboard file: {e}")
 # def load_leaderboard():
 #     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
 #         return pd.DataFrame({
 #             "Model Name": [],
 #             "Overall Accuracy": [],
 #             "Correct Predictions": [],
 #             "Total Questions": [],
 #             "Timestamp": [],
+#             "Team Name": [],
 #         })
 #     return pd.read_csv(LEADERBOARD_FILE)
 def load_leaderboard():
     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
+        # Create an empty DataFrame with all expected columns
         return pd.DataFrame({
             "Model Name": [],
             "Overall Accuracy": [],
             "Total Questions": [],
             "Timestamp": [],
             "Team Name": [],
         })
+    # Read the CSV file
+    df = pd.read_csv(LEADERBOARD_FILE)
+    # Ensure all columns exist
+    expected_columns = [
+        "Model Name",
+        "Overall Accuracy",
+        "Correct Predictions",
+        "Total Questions",
+        "Timestamp",
+        "Team Name"
+    ]
+    # Add missing columns with default values
+    for col in expected_columns:
+        if col not in df.columns:
+            if col == "Timestamp":
+                df[col] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            elif col == "Team Name":
+                df[col] = "Unknown Team"
+            else:
+                df[col] = None
+    # Remove duplicate entries based on Model Name
+    df = df.drop_duplicates(subset="Model Name", keep='last')
+    # Reorder columns to match expected structure
+    df = df[expected_columns]
+    return df
 def load_leaderboard_pro():
     if not os.path.exists(LEADERBOARD_FILE_pro) or os.stat(LEADERBOARD_FILE_pro).st_size == 0:
         })
     return pd.read_csv(LEADERBOARD_FILE_pro)
 def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
     try: