import gradio as gr from lettucedetect.models.inference import HallucinationDetector import os title = """# 🙋🏻‍♂️Welcome to 🌟Tonic's 🥬 LettuceDetect - 🤯🧠 Hallucination Tester 🟢🔴""" description= """ Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support! ### How to Use: 1. Enter a **Context** (source document or info). 2. Enter a **Question** related to the context. 3. Enter an **Answer** to evaluate. 4. Press **Submit** to see if the answer hallucinates! - 🟢 = No hallucinations - 🔴 = Hallucinations detected - Highlighted text shows hallucinated spans in **red** with confidence scores. """ join_us = """ ## Join us: 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/n8ytYeh25n) On 🤗Huggingface: [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Data Tonic](https://github.com/multiTonic/thinking-dataset/) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗 """ # Initialize the LettuceDetect model detector = HallucinationDetector( method="transformer", model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1" ) # Function to evaluate hallucination with LettuceDetect def evaluate_hallucination(context, question, answer): try: # Get span-level predictions from LettuceDetect predictions = detector.predict( context=[context], question=question, answer=answer, output_format="spans" ) # Process predictions for HighlightedText if not predictions: return "🟢", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A" highlighted_segments = [] confidence_scores = [] last_end = 0 total_confidence = 0.0 for pred in predictions: start, end = pred['start'], pred['end'] confidence = pred['confidence'] text = pred['text'] # Add non-hallucinated text before this span if last_end < start: highlighted_segments.append((answer[last_end:start], None)) # Add hallucinated span with confidence as label label_with_confidence = f"hallucination (conf: {confidence:.4f})" highlighted_segments.append((text, label_with_confidence)) confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}") total_confidence += confidence last_end = end # Add any remaining text after the last hallucination if last_end < len(answer): highlighted_segments.append((answer[last_end:], None)) # Calculate average confidence avg_confidence = total_confidence / len(predictions) if predictions else 0.0 # Determine overall status status = "🔴" if predictions else "🟢" explanation = "Hallucinations detected" if predictions else "No hallucinations detected" return ( status, explanation, highlighted_segments, "\n".join(confidence_scores) if confidence_scores else "N/A", f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A" ) except Exception as e: return "⚪", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A" # Gradio Blocks interface with gr.Blocks( title="🥬 LettuceDetect Hallucination Tester 🟢🔴" ) as demo: gr.Markdown(title) with gr.Row(): with gr.Group(): gr.Markdown(description) with gr.Column(): gr.Markdown(join_us) with gr.Row(): with gr.Column(scale=2): # Inputs context_input = gr.Textbox( label="Context", lines=5, placeholder="Enter the context (e.g., a document or source text)..." ) question_input = gr.Textbox( label="Question", placeholder="Enter the question..." ) answer_input = gr.Textbox( label="Answer", lines=3, placeholder="Enter the answer to evaluate..." ) submit_btn = gr.Button("Submit") with gr.Column(scale=3): with gr.Row(): with gr.Column(): status_output = gr.Label(label="Status") with gr.Column(): explanation_output = gr.Textbox(label="Explanation", interactive=False) highlighted_answer_output = gr.HighlightedText( label="Answer with Hallucinations Highlighted", show_legend=False, color_map={"hallucination": "red"}, # Note: Only "hallucination" is used as base category combine_adjacent=True ) spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False) avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False) # Connect inputs to outputs via the evaluation function submit_btn.click( fn=evaluate_hallucination, inputs=[context_input, question_input, answer_input], outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output] ) # Example gr.Markdown("### Example") with gr.Row(): gr.Examples( examples=[ [ "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", "What is the capital of France? What is the population of France?", "The capital of France is Paris. The population of France is 69 million." ] ], inputs=[context_input, question_input, answer_input] ) # Launch the demo demo.launch()