import streamlit as st import pandas as pd from PIL import Image import base64 from io import BytesIO import gradio as gr # Set up page config st.set_page_config( page_title="FactBench Leaderboard", layout="wide" ) # load header with open("_header.md", "r") as f: HEADER_MD = f.read() # Load the image image = Image.open("factEvalSteps.png") logo_image = Image.open("Factbench_logo.png") # Custom CSS for the page st.markdown( """ """, unsafe_allow_html=True ) # Display title and description st.markdown('
🔎 FactBench: A Dynamic Benchmark for In-the-Wild Language Model Factuality Evaluation
📑 Paper | 💻 GitHub | 🐦 X | 💬 Discussion | ⚙️ Version: V1 | # Models: 7 | Updated: 10/26/2024
Tier | Rank | Model | Factuality Score | Hallucination Score | # Tokens | # Factual | # Undecidable | # Unsupported |
---|
Rank | Model | Factuality Score | Hallucination Score | # Tokens | # Factual | # Undecidable | # Unsupported | |
---|---|---|---|---|---|---|---|---|
{current_tier} | ' # Fill in model and scores html += f'''{row['rank']} | {row['model']} | {row['factuality_score']} | {row['hallucination_score']} | {row['avg_tokens']} | {row['avg_factual_units']} | {row['avg_undecidable_units']:.2f} | {row['avg_unsupported_units']:.2f} |