SondosMB commited on
Commit
9f83fb9
·
verified ·
1 Parent(s): aaf50df

Create app,py

Browse files
Files changed (1) hide show
  1. app,py +95 -0
app,py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ import re
5
+ from datetime import datetime
6
+
7
+ # Leaderboard Data (example CSV file for leaderboard)
8
+ LEADERBOARD_FILE = "leaderboard.csv"
9
+
10
+ def clean_answer(answer):
11
+ if pd.isna(answer):
12
+ return None
13
+ answer = str(answer)
14
+ clean = re.sub(r'[^A-Da-d]', '', answer)
15
+ if clean:
16
+ first_letter = clean[0].upper()
17
+ if first_letter in ['A', 'B', 'C', 'D']:
18
+ return first_letter
19
+ return None
20
+
21
+ def update_leaderboard(results):
22
+ # Append results to leaderboard file
23
+ new_entry = {
24
+ "Model Name": results['model_name'],
25
+ "Overall Accuracy": f"{results['overall_accuracy']:.2%}",
26
+ "Valid Accuracy": f"{results['valid_accuracy']:.2%}",
27
+ "Correct Predictions": results['correct_predictions'],
28
+ "Total Questions": results['total_questions'],
29
+ "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
30
+ }
31
+ leaderboard_df = pd.DataFrame([new_entry])
32
+ if os.path.exists(LEADERBOARD_FILE):
33
+ existing_df = pd.read_csv(LEADERBOARD_FILE)
34
+ leaderboard_df = pd.concat([existing_df, leaderboard_df], ignore_index=True)
35
+ leaderboard_df.to_csv(LEADERBOARD_FILE, index=False)
36
+
37
+ def evaluate_predictions(prediction_file):
38
+ ground_truth_file = "ground_truth.csv" # Specify the path to the ground truth file
39
+ if not prediction_file:
40
+ return "Prediction file not uploaded", None
41
+
42
+ if not os.path.exists(ground_truth_file):
43
+ return "Ground truth file not found", None
44
+
45
+ try:
46
+ predictions_df = pd.read_csv(prediction_file.name)
47
+ ground_truth_df = pd.read_csv(ground_truth_file)
48
+ filename = os.path.basename(prediction_file.name)
49
+ model_name = filename.split('_')[1].split('.')[0] if "_" in filename else "unknown_model"
50
+
51
+ merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
52
+ merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
53
+ correct_predictions = (merged_df['pred_answer'] == merged_df['Answer']).sum()
54
+ total_predictions = len(merged_df)
55
+ overall_accuracy = correct_predictions / total_predictions
56
+
57
+ results = {
58
+ 'model_name': model_name,
59
+ 'overall_accuracy': overall_accuracy,
60
+ 'correct_predictions': correct_predictions,
61
+ 'total_questions': total_predictions,
62
+ }
63
+
64
+ update_leaderboard(results)
65
+
66
+ return "Evaluation completed successfully! Leaderboard updated.", LEADERBOARD_FILE
67
+ except Exception as e:
68
+ return f"Error: {str(e)}", None
69
+
70
+ # Gradio Interface with Leaderboard
71
+ def display_leaderboard():
72
+ if not os.path.exists(LEADERBOARD_FILE):
73
+ return "Leaderboard is empty."
74
+ leaderboard_df = pd.read_csv(LEADERBOARD_FILE)
75
+ return leaderboard_df.to_markdown(index=False)
76
+
77
+ demo = gr.Blocks()
78
+
79
+ with demo:
80
+ gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
81
+ with gr.Tab("Evaluate"):
82
+ file_input = gr.File(label="Upload Prediction CSV")
83
+ eval_status = gr.Textbox(label="Evaluation Status")
84
+ eval_results_file = gr.File(label="Download Evaluation Results")
85
+ eval_button = gr.Button("Evaluate")
86
+ eval_button.click(
87
+ evaluate_predictions, inputs=file_input, outputs=[eval_status, eval_results_file]
88
+ )
89
+ with gr.Tab("Leaderboard"):
90
+ leaderboard_text = gr.Textbox(label="Leaderboard", interactive=False)
91
+ refresh_button = gr.Button("Refresh Leaderboard")
92
+ refresh_button.click(display_leaderboard, outputs=leaderboard_text)
93
+
94
+ if __name__ == "__main__":
95
+ demo.launch()