aquibmoin commited on
Commit
c8c03be
·
verified ·
1 Parent(s): e0628d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -2
app.py CHANGED
@@ -5,6 +5,8 @@ from ragas import evaluate, EvaluationDataset
5
  from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity, ResponseGroundedness, AnswerAccuracy
6
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
7
  from ragas.llms import LangchainLLMWrapper
 
 
8
 
9
  # Initialize OpenAI API
10
  llm = ChatOpenAI(model="gpt-4.1")
@@ -29,6 +31,25 @@ def clean_retrieved_context(raw_context):
29
  # Return explicitly cleaned context
30
  return cleaned.strip()
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
34
 
@@ -65,7 +86,12 @@ def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
65
  embeddings=embeddings
66
  )
67
 
68
- return ragas_result
 
 
 
 
 
69
 
70
 
71
  # ----- Gradio Interface -----
@@ -77,7 +103,10 @@ interface = gr.Interface(
77
  gr.File(label="Upload Human-Generated SCDD (Word .docx)", type='filepath'),
78
  gr.Textbox(label="Science Goal (Optional)", placeholder="Enter science goal here..."),
79
  ],
80
- outputs=gr.Textbox(label="RAGAS Evaluation Scores..."),
 
 
 
81
  title="RAGAS Evaluation: AI vs Human SCDD",
82
  description="Compare AI-generated and human-generated science case documents using RAGAS LLM-powered metrics"
83
  )
 
5
  from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity, ResponseGroundedness, AnswerAccuracy
6
  from langchain_openai import ChatOpenAI, OpenAIEmbeddings
7
  from ragas.llms import LangchainLLMWrapper
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
 
11
  # Initialize OpenAI API
12
  llm = ChatOpenAI(model="gpt-4.1")
 
31
  # Return explicitly cleaned context
32
  return cleaned.strip()
33
 
34
+ def format_ragas_results(ragas_results):
35
+ return {metric: f"{score*100:.2f}%" for metric, score in ragas_results.items()}
36
+
37
+ def plot_radar_chart(ragas_results):
38
+ labels = list(ragas_results.keys())
39
+ scores = list(ragas_results.values())
40
+
41
+ scores += scores[:1]
42
+ angles = np.linspace(0, 2 * np.pi, len(labels) + 1, endpoint=True)
43
+
44
+ fig, ax = plt.subplots(figsize=(6,6), subplot_kw=dict(polar=True))
45
+ ax.plot(angles, scores, 'o-', linewidth=2)
46
+ ax.fill(angles, scores, alpha=0.25)
47
+ ax.set_xticks(angles[:-1])
48
+ ax.set_xticklabels(labels)
49
+ ax.set_yticklabels([])
50
+ ax.set_title('RAGAS Metrics Radar Chart')
51
+ plt.tight_layout()
52
+ return fig
53
 
54
  def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
55
 
 
86
  embeddings=embeddings
87
  )
88
 
89
+ # RAGAS metrics outputs
90
+
91
+ formatted_scores = format_ragas_results(ragas_result)
92
+ radar_chart = plot_radar_chart(ragas_result)
93
+
94
+ return formatted_scores, radar_chart
95
 
96
 
97
  # ----- Gradio Interface -----
 
103
  gr.File(label="Upload Human-Generated SCDD (Word .docx)", type='filepath'),
104
  gr.Textbox(label="Science Goal (Optional)", placeholder="Enter science goal here..."),
105
  ],
106
+ outputs=[
107
+ gr.JSON(label="RAGAS Scores"),
108
+ gr.Plot(label="Metrics Radar Chart")
109
+ ],
110
  title="RAGAS Evaluation: AI vs Human SCDD",
111
  description="Compare AI-generated and human-generated science case documents using RAGAS LLM-powered metrics"
112
  )