Spaces:
Sleeping
Sleeping
File size: 2,020 Bytes
3fa594d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
# Function to execute evaluate_model.py
def evaluate_model_script():
import spacy
import jsonlines
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
# Load the trained spaCy model
nlp = spacy.load("./my_trained_model")
# Load the golden evaluation data
golden_eval_data = []
with jsonlines.open("data/goldenEval.jsonl") as reader:
for record in reader:
golden_eval_data.append(record)
# Predict labels for each record using your model
predicted_labels = []
for record in golden_eval_data:
text = record["text"]
doc = nlp(text)
predicted_labels.append(doc.cats)
# Extract ground truth labels from the golden evaluation data
true_labels = [record["accept"] for record in golden_eval_data]
# Convert label format to match sklearn's classification report format
true_labels_flat = [label[0] if label else "reject" for label in true_labels]
predicted_labels_flat = [max(pred, key=pred.get) for pred in predicted_labels]
# Calculate evaluation metrics
accuracy = accuracy_score(true_labels_flat, predicted_labels_flat)
precision = precision_score(true_labels_flat, predicted_labels_flat, average='weighted')
recall = recall_score(true_labels_flat, predicted_labels_flat, average='weighted')
f1 = f1_score(true_labels_flat, predicted_labels_flat, average='weighted')
# Additional classification report
report = classification_report(true_labels_flat, predicted_labels_flat)
# Build the result dictionary
result = {
"accuracy": accuracy,
"precision": precision,
"recall": recall,
"f1_score": f1,
"detailed_classification_report": report
}
return result
# Gradio Interface
output = gr.outputs.Label(type="json", label="Evaluation Metrics")
iface = gr.Interface(fn=evaluate_model_script, outputs=output, title="Evaluate Model Script")
iface.launch()
|