Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import gradio as gr
|
2 |
import re
|
|
|
3 |
from docx import Document
|
|
|
4 |
from ragas import evaluate, EvaluationDataset
|
5 |
from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity, ResponseGroundedness, AnswerAccuracy
|
6 |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
@@ -37,35 +39,44 @@ def format_ragas_results(ragas_results):
|
|
37 |
for sample_scores in ragas_results.scores
|
38 |
]
|
39 |
|
40 |
-
def
|
41 |
-
|
42 |
-
if isinstance(ragas_results.scores, list):
|
43 |
-
scores_dict = ragas_results.scores[0] # first sample
|
44 |
-
else:
|
45 |
-
scores_dict = ragas_results.scores
|
46 |
-
|
47 |
labels = list(scores_dict.keys())
|
48 |
values = list(scores_dict.values())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
|
|
|
|
|
56 |
|
57 |
-
|
58 |
-
|
59 |
|
60 |
-
|
61 |
-
|
|
|
62 |
|
63 |
-
|
64 |
-
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
return
|
69 |
|
70 |
def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
|
71 |
|
@@ -105,9 +116,11 @@ def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
|
|
105 |
# RAGAS metrics outputs
|
106 |
|
107 |
formatted_scores = format_ragas_results(ragas_result)
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
111 |
|
112 |
|
113 |
# ----- Gradio Interface -----
|
@@ -117,11 +130,12 @@ interface = gr.Interface(
|
|
117 |
inputs=[
|
118 |
gr.File(label="Upload AI-Generated SCDD (Word .docx)", type='filepath'),
|
119 |
gr.File(label="Upload Human-Generated SCDD (Word .docx)", type='filepath'),
|
120 |
-
gr.Textbox(label="Science Goal
|
121 |
],
|
122 |
outputs=[
|
123 |
gr.JSON(label="RAGAS Scores"),
|
124 |
-
gr.
|
|
|
125 |
],
|
126 |
title="RAGAS Evaluation: AI vs Human SCDD",
|
127 |
description="Compare AI-generated and human-generated science case documents using RAGAS LLM-powered metrics"
|
|
|
1 |
import gradio as gr
|
2 |
import re
|
3 |
+
import os
|
4 |
from docx import Document
|
5 |
+
from docx.shared import Inches
|
6 |
from ragas import evaluate, EvaluationDataset
|
7 |
from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity, ResponseGroundedness, AnswerAccuracy
|
8 |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
|
|
39 |
for sample_scores in ragas_results.scores
|
40 |
]
|
41 |
|
42 |
+
def plot_radar_chart(ragas_results):
|
43 |
+
scores_dict = ragas_results.scores[0]
|
|
|
|
|
|
|
|
|
|
|
44 |
labels = list(scores_dict.keys())
|
45 |
values = list(scores_dict.values())
|
46 |
+
values.append(values[0]) # Close the loop
|
47 |
+
labels.append(labels[0])
|
48 |
+
|
49 |
+
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
|
50 |
+
angles = [n / float(len(labels)) * 2 * 3.14159 for n in range(len(labels))]
|
51 |
+
ax.plot(angles, values, 'b-', linewidth=2)
|
52 |
+
ax.fill(angles, values, 'skyblue', alpha=0.4)
|
53 |
+
ax.set_yticklabels([])
|
54 |
+
ax.set_xticks(angles)
|
55 |
+
ax.set_xticklabels(labels, fontsize=9)
|
56 |
+
ax.set_title("RAGAS Evaluation Radar Chart", size=14)
|
57 |
|
58 |
+
chart_path = "radar_chart.png"
|
59 |
+
fig.savefig(chart_path)
|
60 |
+
plt.close(fig)
|
61 |
+
return chart_path
|
62 |
|
63 |
+
def generate_word_report(science_goal, ragas_results, radar_chart_path):
|
64 |
+
doc = Document()
|
65 |
+
doc.add_heading("SCDD Evaluation Report", 0)
|
66 |
|
67 |
+
doc.add_heading("Science Goal", level=1)
|
68 |
+
doc.add_paragraph(science_goal)
|
69 |
|
70 |
+
doc.add_heading("RAGAS Evaluation Scores", level=1)
|
71 |
+
for metric, score in ragas_results.scores[0].items():
|
72 |
+
doc.add_paragraph(f"{metric}: {score*100:.2f}%")
|
73 |
|
74 |
+
doc.add_heading("Radar Chart", level=1)
|
75 |
+
doc.add_picture(radar_chart_path, width=Inches(5))
|
76 |
|
77 |
+
output_path = "SCDD_Evaluation_Report.docx"
|
78 |
+
doc.save(output_path)
|
79 |
+
return output_path
|
80 |
|
81 |
def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
|
82 |
|
|
|
116 |
# RAGAS metrics outputs
|
117 |
|
118 |
formatted_scores = format_ragas_results(ragas_result)
|
119 |
+
radar_chart_path = plot_radar_chart(ragas_result)
|
120 |
+
word_report_path = generate_word_report(user_input, ragas_result, radar_chart_path)
|
121 |
+
|
122 |
+
score_text = "\n".join([f"{k}: {v}" for k, v in formatted_scores.items()])
|
123 |
+
return score_text, radar_chart_path, word_report_path
|
124 |
|
125 |
|
126 |
# ----- Gradio Interface -----
|
|
|
130 |
inputs=[
|
131 |
gr.File(label="Upload AI-Generated SCDD (Word .docx)", type='filepath'),
|
132 |
gr.File(label="Upload Human-Generated SCDD (Word .docx)", type='filepath'),
|
133 |
+
gr.Textbox(label="Science Goal", placeholder="Enter science goal here..."),
|
134 |
],
|
135 |
outputs=[
|
136 |
gr.JSON(label="RAGAS Scores"),
|
137 |
+
gr.Image(label="Radar Chart"),
|
138 |
+
gr.File(label="Download Word Report")
|
139 |
],
|
140 |
title="RAGAS Evaluation: AI vs Human SCDD",
|
141 |
description="Compare AI-generated and human-generated science case documents using RAGAS LLM-powered metrics"
|