SondosMB commited on
Commit
f9f34c4
Β·
verified Β·
1 Parent(s): 8bf3c31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -15
app.py CHANGED
@@ -168,6 +168,20 @@ if not HF_TOKEN:
168
  # "Correct Predictions", "Total Questions", "Timestamp"
169
  # ]).to_csv(LEADERBOARD_FILE, index=False)
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  def initialize_leaderboard_file():
172
  """
173
  Ensure the leaderboard file exists and has the correct headers.
@@ -181,8 +195,8 @@ def initialize_leaderboard_file():
181
  pd.DataFrame(columns=[
182
  "Model Name", "Overall Accuracy", "Correct Predictions",
183
  "Total Questions", "Timestamp", "Team Name"
184
- ]).to_csv(LEADERBOARD_FILE, index=False)
185
-
186
  def initialize_leaderboard_pro_file():
187
  """
188
  Ensure the leaderboard file exists and has the correct headers.
@@ -430,6 +444,63 @@ def load_leaderboard_pro():
430
  # except Exception as e:
431
  # return f"Error during evaluation: {str(e)}", load_leaderboard()
432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
434
  try:
435
  ground_truth_path = hf_hub_download(
@@ -455,7 +526,7 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
455
  missing_columns = [col for col in required_columns if col not in predictions_df.columns]
456
  if missing_columns:
457
  return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
458
- load_leaderboard())
459
 
460
  # Validate 'Answer' column in ground truth file
461
  if 'Answer' not in ground_truth_df.columns:
@@ -484,9 +555,7 @@ def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboa
484
  return "Evaluation completed but not added to leaderboard.", load_leaderboard()
485
 
486
  except Exception as e:
487
- return f"Error during evaluation: {str(e)}", load_leaderboard()
488
- initialize_leaderboard_file()
489
-
490
 
491
 
492
  def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leaderboard):
@@ -936,16 +1005,62 @@ with gr.Blocks(css=css_tech_theme) as demo:
936
 
937
 
938
 
939
- def handle_evaluation(file, model_name, Team_name):
940
- print("πŸš€ Evaluation function started 1") # Debugging print
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
941
  if not file:
942
- print("πŸš€ Evaluation function started 2") # Debugging print
943
  return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
944
  if not model_name or model_name.strip() == "":
945
- print("πŸš€ Evaluation function started 3") # Debugging print
946
  return "Error: Please enter a model name.", 0, gr.update(visible=False)
947
  if not Team_name or Team_name.strip() == "":
948
- print("πŸš€ Evaluation function started 4") # Debugging print
949
  return "Error: Please enter a Team name.", 0, gr.update(visible=False)
950
 
951
  try:
@@ -984,9 +1099,9 @@ with gr.Blocks(css=css_tech_theme) as demo:
984
  return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
985
 
986
  except Exception as e:
987
- return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
988
 
989
- def handle_evaluation_pro(file, model_name, Team_name):
990
  if not file:
991
  return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
992
  if not model_name or model_name.strip() == "":
@@ -1030,8 +1145,7 @@ with gr.Blocks(css=css_tech_theme) as demo:
1030
  return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
1031
 
1032
  except Exception as e:
1033
- return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
1034
-
1035
 
1036
 
1037
 
@@ -1060,6 +1174,12 @@ with gr.Blocks(css=css_tech_theme) as demo:
1060
  outputs=[eval_status, overall_accuracy_display, submit_button_pro],
1061
  )
1062
 
 
 
 
 
 
 
1063
  submit_button_pro.click(
1064
  handle_submission_pro,
1065
  inputs=[file_input, model_name_input,Team_name_input],
 
168
  # "Correct Predictions", "Total Questions", "Timestamp"
169
  # ]).to_csv(LEADERBOARD_FILE, index=False)
170
 
171
+ # def initialize_leaderboard_file():
172
+ # """
173
+ # Ensure the leaderboard file exists and has the correct headers.
174
+ # """
175
+ # if not os.path.exists(LEADERBOARD_FILE):
176
+ # pd.DataFrame(columns=[
177
+ # "Model Name", "Overall Accuracy", "Correct Predictions",
178
+ # "Total Questions", "Timestamp", "Team Name"
179
+ # ]).to_csv(LEADERBOARD_FILE, index=False)
180
+ # elif os.stat(LEADERBOARD_FILE).st_size == 0:
181
+ # pd.DataFrame(columns=[
182
+ # "Model Name", "Overall Accuracy", "Correct Predictions",
183
+ # "Total Questions", "Timestamp", "Team Name"
184
+ # ]).to_csv(LEADERBOARD_FILE, index=False)
185
  def initialize_leaderboard_file():
186
  """
187
  Ensure the leaderboard file exists and has the correct headers.
 
195
  pd.DataFrame(columns=[
196
  "Model Name", "Overall Accuracy", "Correct Predictions",
197
  "Total Questions", "Timestamp", "Team Name"
198
+ ]).to_csv(LEADERBOARD_FILE, index=False)
199
+
200
  def initialize_leaderboard_pro_file():
201
  """
202
  Ensure the leaderboard file exists and has the correct headers.
 
444
  # except Exception as e:
445
  # return f"Error during evaluation: {str(e)}", load_leaderboard()
446
 
447
+ # def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
448
+ # try:
449
+ # ground_truth_path = hf_hub_download(
450
+ # repo_id="SondosMB/ground-truth-dataset",
451
+ # filename="ground_truth.csv",
452
+ # repo_type="dataset",
453
+ # use_auth_token=True
454
+ # )
455
+ # ground_truth_df = pd.read_csv(ground_truth_path)
456
+ # except FileNotFoundError:
457
+ # return "Ground truth file not found in the dataset repository.", load_leaderboard()
458
+ # except Exception as e:
459
+ # return f"Error loading ground truth: {e}", load_leaderboard()
460
+
461
+ # if not prediction_file:
462
+ # return "Prediction file not uploaded.", load_leaderboard()
463
+
464
+ # try:
465
+ # #load prediction file
466
+ # predictions_df = pd.read_csv(prediction_file.name)
467
+ # # Validate required columns in prediction file
468
+ # required_columns = ['question_id', 'predicted_answer']
469
+ # missing_columns = [col for col in required_columns if col not in predictions_df.columns]
470
+ # if missing_columns:
471
+ # return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
472
+ # load_leaderboard())
473
+
474
+ # # Validate 'Answer' column in ground truth file
475
+ # if 'Answer' not in ground_truth_df.columns:
476
+ # return "Error: 'Answer' column is missing in the ground truth dataset.", load_leaderboard()
477
+ # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
478
+ # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
479
+
480
+ # valid_predictions = merged_df.dropna(subset=['pred_answer'])
481
+ # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
482
+ # total_predictions = len(merged_df)
483
+
484
+ # overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
485
+
486
+ # results = {
487
+ # 'model_name': model_name if model_name else "Unknown Model",
488
+ # 'overall_accuracy': overall_accuracy,
489
+ # 'correct_predictions': correct_predictions,
490
+ # 'total_questions': total_predictions,
491
+ # 'Team_name': Team_name if Team_name else "Unknown Team",
492
+ # }
493
+
494
+ # if add_to_leaderboard:
495
+ # update_leaderboard(results)
496
+ # return "Evaluation completed and added to leaderboard.", load_leaderboard()
497
+ # else:
498
+ # return "Evaluation completed but not added to leaderboard.", load_leaderboard()
499
+
500
+ # except Exception as e:
501
+ # return f"Error during evaluation: {str(e)}", load_leaderboard()
502
+ # initialize_leaderboard_file()
503
+
504
  def evaluate_predictions(prediction_file, model_name,Team_name ,add_to_leaderboard):
505
  try:
506
  ground_truth_path = hf_hub_download(
 
526
  missing_columns = [col for col in required_columns if col not in predictions_df.columns]
527
  if missing_columns:
528
  return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
529
+ load_leaderboard_pro())
530
 
531
  # Validate 'Answer' column in ground truth file
532
  if 'Answer' not in ground_truth_df.columns:
 
555
  return "Evaluation completed but not added to leaderboard.", load_leaderboard()
556
 
557
  except Exception as e:
558
+ return f"Error during evaluation: {str(e)}", load_leaderboard(),initialize_leaderboard_file()
 
 
559
 
560
 
561
  def evaluate_predictions_pro(prediction_file, model_name,Team_name ,add_to_leaderboard):
 
1005
 
1006
 
1007
 
1008
+ # def handle_evaluation(file, model_name, Team_name):
1009
+ # print("πŸš€ Evaluation function started 1") # Debugging print
1010
+ # if not file:
1011
+ # print("πŸš€ Evaluation function started 2") # Debugging print
1012
+ # return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
1013
+ # if not model_name or model_name.strip() == "":
1014
+ # print("πŸš€ Evaluation function started 3") # Debugging print
1015
+ # return "Error: Please enter a model name.", 0, gr.update(visible=False)
1016
+ # if not Team_name or Team_name.strip() == "":
1017
+ # print("πŸš€ Evaluation function started 4") # Debugging print
1018
+ # return "Error: Please enter a Team name.", 0, gr.update(visible=False)
1019
+
1020
+ # try:
1021
+ # # Load predictions file
1022
+ # predictions_df = pd.read_csv(file.name)
1023
+
1024
+ # # Validate required columns
1025
+ # required_columns = ['question_id', 'predicted_answer']
1026
+ # missing_columns = [col for col in required_columns if col not in predictions_df.columns]
1027
+ # if missing_columns:
1028
+ # return (f"Error: Missing required columns in prediction file: {', '.join(missing_columns)}.",
1029
+ # 0, gr.update(visible=False))
1030
+
1031
+ # # Load ground truth
1032
+ # try:
1033
+ # ground_truth_path = hf_hub_download(
1034
+ # repo_id="SondosMB/ground-truth-dataset",
1035
+ # filename="ground_truth.csv",
1036
+ # repo_type="dataset",
1037
+ # use_auth_token=True
1038
+ # )
1039
+ # ground_truth_df = pd.read_csv(ground_truth_path)
1040
+ # except Exception as e:
1041
+ # return f"Error loading ground truth: {e}", 0, gr.update(visible=False)
1042
+
1043
+ # # Perform evaluation calculations
1044
+ # merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
1045
+ # merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
1046
+
1047
+ # valid_predictions = merged_df.dropna(subset=['pred_answer'])
1048
+ # correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
1049
+ # total_predictions = len(merged_df)
1050
+
1051
+ # overall_accuracy = (correct_predictions / total_predictions * 100) if total_predictions > 0 else 0
1052
+
1053
+ # return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
1054
+
1055
+ # except Exception as e:
1056
+ # return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
1057
+
1058
+ def handle_evaluation_pro(file, model_name, Team_name):
1059
  if not file:
 
1060
  return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
1061
  if not model_name or model_name.strip() == "":
 
1062
  return "Error: Please enter a model name.", 0, gr.update(visible=False)
1063
  if not Team_name or Team_name.strip() == "":
 
1064
  return "Error: Please enter a Team name.", 0, gr.update(visible=False)
1065
 
1066
  try:
 
1099
  return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
1100
 
1101
  except Exception as e:
1102
+ return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
1103
 
1104
+ def handle_evaluation(file, model_name, Team_name):
1105
  if not file:
1106
  return "Error: Please upload a prediction file.", 0, gr.update(visible=False)
1107
  if not model_name or model_name.strip() == "":
 
1145
  return "Evaluation completed successfully.", overall_accuracy, gr.update(visible=True)
1146
 
1147
  except Exception as e:
1148
+ return f"Error during evaluation: {str(e)}", 0, gr.update(visible=False)
 
1149
 
1150
 
1151
 
 
1174
  outputs=[eval_status, overall_accuracy_display, submit_button_pro],
1175
  )
1176
 
1177
+ eval_button.click(
1178
+ handle_evaluation,
1179
+ inputs=[file_input, model_name_input,Team_name_input],
1180
+ outputs=[eval_status, overall_accuracy_display, submit_button_pro],
1181
+ )
1182
+
1183
  submit_button_pro.click(
1184
  handle_submission_pro,
1185
  inputs=[file_input, model_name_input,Team_name_input],