SondosMB's picture
Create app,py
9f83fb9 verified
raw
history blame
3.64 kB
import gradio as gr
import pandas as pd
import os
import re
from datetime import datetime
# Leaderboard Data (example CSV file for leaderboard)
LEADERBOARD_FILE = "leaderboard.csv"
def clean_answer(answer):
if pd.isna(answer):
return None
answer = str(answer)
clean = re.sub(r'[^A-Da-d]', '', answer)
if clean:
first_letter = clean[0].upper()
if first_letter in ['A', 'B', 'C', 'D']:
return first_letter
return None
def update_leaderboard(results):
# Append results to leaderboard file
new_entry = {
"Model Name": results['model_name'],
"Overall Accuracy": f"{results['overall_accuracy']:.2%}",
"Valid Accuracy": f"{results['valid_accuracy']:.2%}",
"Correct Predictions": results['correct_predictions'],
"Total Questions": results['total_questions'],
"Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
leaderboard_df = pd.DataFrame([new_entry])
if os.path.exists(LEADERBOARD_FILE):
existing_df = pd.read_csv(LEADERBOARD_FILE)
leaderboard_df = pd.concat([existing_df, leaderboard_df], ignore_index=True)
leaderboard_df.to_csv(LEADERBOARD_FILE, index=False)
def evaluate_predictions(prediction_file):
ground_truth_file = "ground_truth.csv" # Specify the path to the ground truth file
if not prediction_file:
return "Prediction file not uploaded", None
if not os.path.exists(ground_truth_file):
return "Ground truth file not found", None
try:
predictions_df = pd.read_csv(prediction_file.name)
ground_truth_df = pd.read_csv(ground_truth_file)
filename = os.path.basename(prediction_file.name)
model_name = filename.split('_')[1].split('.')[0] if "_" in filename else "unknown_model"
merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
correct_predictions = (merged_df['pred_answer'] == merged_df['Answer']).sum()
total_predictions = len(merged_df)
overall_accuracy = correct_predictions / total_predictions
results = {
'model_name': model_name,
'overall_accuracy': overall_accuracy,
'correct_predictions': correct_predictions,
'total_questions': total_predictions,
}
update_leaderboard(results)
return "Evaluation completed successfully! Leaderboard updated.", LEADERBOARD_FILE
except Exception as e:
return f"Error: {str(e)}", None
# Gradio Interface with Leaderboard
def display_leaderboard():
if not os.path.exists(LEADERBOARD_FILE):
return "Leaderboard is empty."
leaderboard_df = pd.read_csv(LEADERBOARD_FILE)
return leaderboard_df.to_markdown(index=False)
demo = gr.Blocks()
with demo:
gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
with gr.Tab("Evaluate"):
file_input = gr.File(label="Upload Prediction CSV")
eval_status = gr.Textbox(label="Evaluation Status")
eval_results_file = gr.File(label="Download Evaluation Results")
eval_button = gr.Button("Evaluate")
eval_button.click(
evaluate_predictions, inputs=file_input, outputs=[eval_status, eval_results_file]
)
with gr.Tab("Leaderboard"):
leaderboard_text = gr.Textbox(label="Leaderboard", interactive=False)
refresh_button = gr.Button("Refresh Leaderboard")
refresh_button.click(display_leaderboard, outputs=leaderboard_text)
if __name__ == "__main__":
demo.launch()