Spaces:

kluster-ai
/

LLM-Hallucination-Detection-Leaderboard

Running

App Files Files Community

rymc commited on Jul 7

Commit

a9d1818

verified ·

1 Parent(s): 676fa45

add captions to figures

Browse files

Files changed (1) hide show

app.py +22 -18

app.py CHANGED Viewed

@@ -188,24 +188,28 @@ with demo:
         with gr.TabItem("🏅 Hallucination Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
             # ----------  Chart  ----------
             with gr.Row():
-                gr.Plot(
-                    make_leaderboard_plot(
-                        LEADERBOARD_DF,
-                        "RAG Hallucination Rate (%)",
-                        "RAG Hallucination Rate (lower is better)",
-                        bar_color="#4CAF50",
-                    ),
-                    show_label=False,
-                )
-                gr.Plot(
-                    make_leaderboard_plot(
-                        LEADERBOARD_DF,
-                        "Non-RAG Hallucination Rate (%)",
-                        "Non-RAG Hallucination Rate (lower is better)",
-                        bar_color="#FF7043",
-                    ),
-                    show_label=False,
-                )
             # ----------  Leaderboard  ----------
             leaderboard = init_leaderboard(LEADERBOARD_DF)

         with gr.TabItem("🏅 Hallucination Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
             # ----------  Chart  ----------
             with gr.Row():
+                with gr.Column():
+                    gr.Plot(
+                        make_leaderboard_plot(
+                            LEADERBOARD_DF,
+                            "RAG Hallucination Rate (%)",
+                            "RAG Hallucination Rate (lower is better)",
+                            bar_color="#4CAF50",
+                        ),
+                        show_label=False,
+                    )
+                    gr.Markdown("*HaluEval-QA benchmark (RAG): The model receives a question plus supporting context. We report the % of answers that introduce facts not found in that context — lower is better. See the **Details** tab for more information.*", elem_classes="plot-caption")
+                with gr.Column():
+                    gr.Plot(
+                        make_leaderboard_plot(
+                            LEADERBOARD_DF,
+                            "Non-RAG Hallucination Rate (%)",
+                            "Non-RAG Hallucination Rate (lower is better)",
+                            bar_color="#FF7043",
+                        ),
+                        show_label=False,
+                    )
+                    gr.Markdown("*UltraChat benchmark (~11 k prompts, non-RAG): Evaluates open-domain answers when only the question is given. Score is the % of hallucinated responses — lower is better. See the **Details** tab for more information.*", elem_classes="plot-caption")
             # ----------  Leaderboard  ----------
             leaderboard = init_leaderboard(LEADERBOARD_DF)