Spaces:
Sleeping
Sleeping
New changes
Browse files
app.py
CHANGED
@@ -588,9 +588,9 @@ def evaluate_rag_pipeline(domain, q_indices):
|
|
588 |
return round(np.sqrt(np.mean((np.array(gt) - np.array(pred)) ** 2)), 4)
|
589 |
|
590 |
result = {
|
591 |
-
"Context Relevance": compute_rmse(gt_relevance, pred_relevance),
|
592 |
-
"Context Utilization": compute_rmse(gt_utilization, pred_utilization),
|
593 |
-
"Completeness": compute_rmse(gt_completeness, pred_completeness),
|
594 |
}
|
595 |
|
596 |
if len(set(gt_adherence)) == 2:
|
@@ -636,7 +636,7 @@ iface = gr.Interface(
|
|
636 |
gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
|
637 |
gr.Textbox(label="Execution Log", lines=10, interactive=True),
|
638 |
],
|
639 |
-
title="RAG Evaluation Dashboard",
|
640 |
description="Evaluate your RAG pipeline across selected queries using LLM-based generation and judgment."
|
641 |
)
|
642 |
|
|
|
588 |
return round(np.sqrt(np.mean((np.array(gt) - np.array(pred)) ** 2)), 4)
|
589 |
|
590 |
result = {
|
591 |
+
"RMSE Context Relevance": compute_rmse(gt_relevance, pred_relevance),
|
592 |
+
"RMSE Context Utilization": compute_rmse(gt_utilization, pred_utilization),
|
593 |
+
"RMSE Completeness": compute_rmse(gt_completeness, pred_completeness),
|
594 |
}
|
595 |
|
596 |
if len(set(gt_adherence)) == 2:
|
|
|
636 |
gr.JSON(label="Evaluation Metrics (RMSE & AUC-ROC)"),
|
637 |
gr.Textbox(label="Execution Log", lines=10, interactive=True),
|
638 |
],
|
639 |
+
title=" RAG Evaluation Dashboard",
|
640 |
description="Evaluate your RAG pipeline across selected queries using LLM-based generation and judgment."
|
641 |
)
|
642 |
|