Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -73,7 +73,27 @@ def plot_radar_chart(ragas_results):
|
|
73 |
plt.close(fig)
|
74 |
return chart_path
|
75 |
|
76 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
doc = Document()
|
78 |
doc.add_heading("SCDD Evaluation Report", 0)
|
79 |
|
@@ -87,6 +107,9 @@ def generate_word_report(science_goal, ragas_results, radar_chart_path):
|
|
87 |
doc.add_heading("RAGAS Metrics Chart", level=1)
|
88 |
doc.add_picture(radar_chart_path, width=Inches(5))
|
89 |
|
|
|
|
|
|
|
90 |
output_path = "SCDD_Evaluation_Report.docx"
|
91 |
doc.save(output_path)
|
92 |
return output_path
|
@@ -130,9 +153,10 @@ def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
|
|
130 |
|
131 |
formatted_scores = format_ragas_results(ragas_result)
|
132 |
radar_chart_path = plot_radar_chart(ragas_result)
|
133 |
-
|
|
|
134 |
|
135 |
-
return formatted_scores, radar_chart_path, word_report_path
|
136 |
|
137 |
|
138 |
# ----- Gradio Interface -----
|
@@ -147,6 +171,7 @@ interface = gr.Interface(
|
|
147 |
outputs=[
|
148 |
gr.JSON(label="RAGAS Scores"),
|
149 |
gr.Image(label="RAGAS Metrics Radar Chart"),
|
|
|
150 |
gr.File(label="Download Word Report")
|
151 |
],
|
152 |
title="RAGAS Evaluation: AI vs Human SCDD",
|
|
|
73 |
plt.close(fig)
|
74 |
return chart_path
|
75 |
|
76 |
+
def interpret_ragas_results_with_gpt(formatted_scores: list, llm) -> str:
|
77 |
+
|
78 |
+
if not formatted_scores or not isinstance(formatted_scores[0], dict):
|
79 |
+
return "Invalid RAGAS scores provided."
|
80 |
+
|
81 |
+
score_text = "\n".join([f"{k}: {v}" for k, v in formatted_scores[0].items()])
|
82 |
+
|
83 |
+
prompt = f"""
|
84 |
+
You are an expert in RAGAS evaluation metrics to evaluate AI-generated content. Based on the following RAGAS evaluation scores, provide a concise interpretation of each of the metric for the evaluation of AI-generated text. Write in a professional, clear, and objective tone.
|
85 |
+
|
86 |
+
RAGAS Scores:
|
87 |
+
{score_text}
|
88 |
+
|
89 |
+
Provide a paragraph-style interpretation.
|
90 |
+
"""
|
91 |
+
|
92 |
+
response = llm.invoke(prompt)
|
93 |
+
return response.content.strip()
|
94 |
+
|
95 |
+
|
96 |
+
def generate_word_report(science_goal, ragas_results, radar_chart_path, interpretation):
|
97 |
doc = Document()
|
98 |
doc.add_heading("SCDD Evaluation Report", 0)
|
99 |
|
|
|
107 |
doc.add_heading("RAGAS Metrics Chart", level=1)
|
108 |
doc.add_picture(radar_chart_path, width=Inches(5))
|
109 |
|
110 |
+
doc.add_heading("GPT Interpretation", level=1)
|
111 |
+
doc.add_paragraph(interpretation)
|
112 |
+
|
113 |
output_path = "SCDD_Evaluation_Report.docx"
|
114 |
doc.save(output_path)
|
115 |
return output_path
|
|
|
153 |
|
154 |
formatted_scores = format_ragas_results(ragas_result)
|
155 |
radar_chart_path = plot_radar_chart(ragas_result)
|
156 |
+
interpretation = interpret_ragas_results_with_gpt(formatted_scores, llm)
|
157 |
+
word_report_path = generate_word_report(user_input, ragas_result, radar_chart_path, interpretation)
|
158 |
|
159 |
+
return formatted_scores, radar_chart_path, interpretation, word_report_path
|
160 |
|
161 |
|
162 |
# ----- Gradio Interface -----
|
|
|
171 |
outputs=[
|
172 |
gr.JSON(label="RAGAS Scores"),
|
173 |
gr.Image(label="RAGAS Metrics Radar Chart"),
|
174 |
+
gr.Textbox(label="GPT Interpretation of RAGAS Results"),
|
175 |
gr.File(label="Download Word Report")
|
176 |
],
|
177 |
title="RAGAS Evaluation: AI vs Human SCDD",
|