Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,8 @@ from ragas import evaluate, EvaluationDataset
|
|
5 |
from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity, ResponseGroundedness, AnswerAccuracy
|
6 |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
7 |
from ragas.llms import LangchainLLMWrapper
|
|
|
|
|
8 |
|
9 |
# Initialize OpenAI API
|
10 |
llm = ChatOpenAI(model="gpt-4.1")
|
@@ -29,6 +31,25 @@ def clean_retrieved_context(raw_context):
|
|
29 |
# Return explicitly cleaned context
|
30 |
return cleaned.strip()
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
|
34 |
|
@@ -65,7 +86,12 @@ def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
|
|
65 |
embeddings=embeddings
|
66 |
)
|
67 |
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
|
71 |
# ----- Gradio Interface -----
|
@@ -77,7 +103,10 @@ interface = gr.Interface(
|
|
77 |
gr.File(label="Upload Human-Generated SCDD (Word .docx)", type='filepath'),
|
78 |
gr.Textbox(label="Science Goal (Optional)", placeholder="Enter science goal here..."),
|
79 |
],
|
80 |
-
outputs=
|
|
|
|
|
|
|
81 |
title="RAGAS Evaluation: AI vs Human SCDD",
|
82 |
description="Compare AI-generated and human-generated science case documents using RAGAS LLM-powered metrics"
|
83 |
)
|
|
|
5 |
from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity, ResponseGroundedness, AnswerAccuracy
|
6 |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
7 |
from ragas.llms import LangchainLLMWrapper
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import numpy as np
|
10 |
|
11 |
# Initialize OpenAI API
|
12 |
llm = ChatOpenAI(model="gpt-4.1")
|
|
|
31 |
# Return explicitly cleaned context
|
32 |
return cleaned.strip()
|
33 |
|
34 |
+
def format_ragas_results(ragas_results):
|
35 |
+
return {metric: f"{score*100:.2f}%" for metric, score in ragas_results.items()}
|
36 |
+
|
37 |
+
def plot_radar_chart(ragas_results):
|
38 |
+
labels = list(ragas_results.keys())
|
39 |
+
scores = list(ragas_results.values())
|
40 |
+
|
41 |
+
scores += scores[:1]
|
42 |
+
angles = np.linspace(0, 2 * np.pi, len(labels) + 1, endpoint=True)
|
43 |
+
|
44 |
+
fig, ax = plt.subplots(figsize=(6,6), subplot_kw=dict(polar=True))
|
45 |
+
ax.plot(angles, scores, 'o-', linewidth=2)
|
46 |
+
ax.fill(angles, scores, alpha=0.25)
|
47 |
+
ax.set_xticks(angles[:-1])
|
48 |
+
ax.set_xticklabels(labels)
|
49 |
+
ax.set_yticklabels([])
|
50 |
+
ax.set_title('RAGAS Metrics Radar Chart')
|
51 |
+
plt.tight_layout()
|
52 |
+
return fig
|
53 |
|
54 |
def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
|
55 |
|
|
|
86 |
embeddings=embeddings
|
87 |
)
|
88 |
|
89 |
+
# RAGAS metrics outputs
|
90 |
+
|
91 |
+
formatted_scores = format_ragas_results(ragas_result)
|
92 |
+
radar_chart = plot_radar_chart(ragas_result)
|
93 |
+
|
94 |
+
return formatted_scores, radar_chart
|
95 |
|
96 |
|
97 |
# ----- Gradio Interface -----
|
|
|
103 |
gr.File(label="Upload Human-Generated SCDD (Word .docx)", type='filepath'),
|
104 |
gr.Textbox(label="Science Goal (Optional)", placeholder="Enter science goal here..."),
|
105 |
],
|
106 |
+
outputs=[
|
107 |
+
gr.JSON(label="RAGAS Scores"),
|
108 |
+
gr.Plot(label="Metrics Radar Chart")
|
109 |
+
],
|
110 |
title="RAGAS Evaluation: AI vs Human SCDD",
|
111 |
description="Compare AI-generated and human-generated science case documents using RAGAS LLM-powered metrics"
|
112 |
)
|