Spaces:

aquibmoin
/

AI-SciDoc-Evaluator

Sleeping

App Files Files Community

aquibmoin commited on Jul 22

Commit

c8c03be

verified ·

1 Parent(s): e0628d8

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -2

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ from ragas import evaluate, EvaluationDataset
 from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity, ResponseGroundedness, AnswerAccuracy
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from ragas.llms import LangchainLLMWrapper
 # Initialize OpenAI API
 llm = ChatOpenAI(model="gpt-4.1")
@@ -29,6 +31,25 @@ def clean_retrieved_context(raw_context):
     # Return explicitly cleaned context
     return cleaned.strip()
 def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
@@ -65,7 +86,12 @@ def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
         embeddings=embeddings
     )
-    return ragas_result
 # ----- Gradio Interface -----
@@ -77,7 +103,10 @@ interface = gr.Interface(
         gr.File(label="Upload Human-Generated SCDD (Word .docx)", type='filepath'),
         gr.Textbox(label="Science Goal (Optional)", placeholder="Enter science goal here..."),
     ],
-    outputs=gr.Textbox(label="RAGAS Evaluation Scores..."),
     title="RAGAS Evaluation: AI vs Human SCDD",
     description="Compare AI-generated and human-generated science case documents using RAGAS LLM-powered metrics"
 )

 from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity, ResponseGroundedness, AnswerAccuracy
 from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from ragas.llms import LangchainLLMWrapper
+import matplotlib.pyplot as plt
+import numpy as np
 # Initialize OpenAI API
 llm = ChatOpenAI(model="gpt-4.1")
     # Return explicitly cleaned context
     return cleaned.strip()
+def format_ragas_results(ragas_results):
+    return {metric: f"{score*100:.2f}%" for metric, score in ragas_results.items()}
+def plot_radar_chart(ragas_results):
+    labels = list(ragas_results.keys())
+    scores = list(ragas_results.values())
+    scores += scores[:1]
+    angles = np.linspace(0, 2 * np.pi, len(labels) + 1, endpoint=True)
+    fig, ax = plt.subplots(figsize=(6,6), subplot_kw=dict(polar=True))
+    ax.plot(angles, scores, 'o-', linewidth=2)
+    ax.fill(angles, scores, alpha=0.25)
+    ax.set_xticks(angles[:-1])
+    ax.set_xticklabels(labels)
+    ax.set_yticklabels([])
+    ax.set_title('RAGAS Metrics Radar Chart')
+    plt.tight_layout()
+    return fig
 def evaluate_scdd(ai_scdd_file, human_scdd_file, user_input):
         embeddings=embeddings
     )
+    # RAGAS metrics outputs
+    formatted_scores = format_ragas_results(ragas_result)
+    radar_chart = plot_radar_chart(ragas_result)
+    return formatted_scores, radar_chart
 # ----- Gradio Interface -----
         gr.File(label="Upload Human-Generated SCDD (Word .docx)", type='filepath'),
         gr.Textbox(label="Science Goal (Optional)", placeholder="Enter science goal here..."),
     ],
+    outputs=[
+        gr.JSON(label="RAGAS Scores"),
+        gr.Plot(label="Metrics Radar Chart")
+    ],
     title="RAGAS Evaluation: AI vs Human SCDD",
     description="Compare AI-generated and human-generated science case documents using RAGAS LLM-powered metrics"
 )