Spaces:

shezamunir
/

test-leaderboard

Sleeping

File size: 4,468 Bytes

aecfcd3

import streamlit as st
import pandas as pd

# Set up page config for a better look
st.set_page_config(
    page_title="FactBench Leaderboard",
    layout="centered",
)

st.markdown(
    """
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Courier+Prime:wght@400&display=swap');

    html, body, [class*="css"] {
        font-family: 'Courier Prime', monospace;  /* Command-line font */
    }

    .title {
        font-size: 42px;
        font-weight: bold;
        text-align: center;
        color: #333;
        margin-bottom: 5px;
    }

    .description {
        font-size: 22px;
        text-align: center;
        margin-bottom: 30px;
        color: #555;
    }

    .table-container {
        margin-top: 20px;
    }

    table {
        width: 100%;  /* Set table to fill width */
        border-collapse: collapse;  /* Merge cells neatly */
        border-radius: 10px;  /* Rounded edges */
        overflow: hidden;  /* Ensure rounded edges are visible */
    }

    th, td {
        padding: 8px;  /* Reduced padding for smaller font */
        text-align: center;  /* Center-align text */
        border: 1px solid #ddd;  /* Add borders */
        font-size: 14px;  /* Smaller font size */
    }

    th {
        background-color: #f2f2f2;  /* Light gray background for header */
        font-weight: bold;  /* Bold font for headers */
    }

    /* Specific column widths */
    td:nth-child(2), th:nth-child(2) {  /* Wider Model column */
        width: 30%;  /* Increased width for model column */
    }

    td:nth-child(3), th:nth-child(3),
    td:nth-child(4), th:nth-child(4),
    td:nth-child(5), th:nth-child(5),
    td:nth-child(6), th:nth-child(6) {
        width: 17.5%;  /* Equal width for the rest */
    }

    /* Hover effect for table rows */
    tr:hover {
        background-color: #eaeaea;  /* Light grey on hover */
    }

    </style>
    """,
    unsafe_allow_html=True
)

# Add title and description
st.markdown('<div class="title">FactBench Leaderboard</div>',
            unsafe_allow_html=True)
st.markdown('<div class="description">Benchmark for LM Factuality Evaluation</div>',
            unsafe_allow_html=True)

# Data for all tiers combined
data = {
    'Tier': ['Easy', 'Easy', 'Easy', 'Easy',
             'Moderate', 'Moderate', 'Moderate', 'Moderate',
             'Hard', 'Hard', 'Hard', 'Hard'],
    'Model': ['GPT4-o', 'Gemini1.5-Pro', 'Llama3.1-70B-Instruct', 'Llama3.1-405B-Instruct',
              'GPT4-o', 'Gemini1.5-Pro', 'Llama3.1-70B-Instruct', 'Llama3.1-405B-Instruct',
              'GPT4-o', 'Gemini1.5-Pro', 'Llama3.1-70B-Instruct', 'Llama3.1-405B-Instruct'],
    'FactScore': [53.19, 51.79, 52.49, 53.22, 54.76, 52.62, 52.53, 53.48, 69.44, 66.05, 69.85, 70.04],
    'SAFE': [63.31, 61.24, 61.29, 61.63, 65.01, 62.68, 62.64, 63.29, 76.17, 75.69, 77.55, 77.01],
    'Factcheck-GPT': [86.4, 83.45, 83.48, 83.57, 89.39, 87.44, 85.16, 86.37, 94.25, 91.09, 92.89, 93.64],
    'VERIFY': [71.58, 69.38, 67.27, 64.94, 76.02, 74.24, 72.01, 70.25, 90.58, 87.82, 86.63, 85.79]
}

# Convert the data to a DataFrame
df = pd.DataFrame(data)

# Dropdown menu to filter tiers
tiers = ['All Tiers', 'Easy', 'Moderate', 'Hard']
selected_tier = st.selectbox('Select Tier:', tiers)

# Filter the data based on the selected tier
if selected_tier != 'All Tiers':
    filtered_df = df[df['Tier'] == selected_tier]
else:
    filtered_df = df

# Create HTML for the table
html = '''
<table>
    <thead>
        <tr>
            <th>Tier</th>
            <th>Model</th>
            <th>FactScore</th>
            <th>SAFE</th>
            <th>Factcheck-GPT</th>
            <th>VERIFY</th>
        </tr>
    </thead>
    <tbody>
'''

# Generate the rows of the table
current_tier = None
for i, row in filtered_df.iterrows():
    if row['Tier'] != current_tier:
        if current_tier is not None:
            # Close the previous tier row
            html += '    </tr>'
        current_tier = row['Tier']
        html += f'    <tr><td rowspan="4" style="vertical-align: middle;">{current_tier}</td>'
    else:
        html += '    <tr>'

    # Fill in model and scores
    html += f'''
        <td>{row['Model']}</td>
        <td>{row['FactScore']:.2f}</td>
        <td>{row['SAFE']:.2f}</td>
        <td>{row['Factcheck-GPT']:.2f}</td>
        <td>{row['VERIFY']:.2f}</td>
    </tr>
'''

# Close the last row and table tags
html += '''

</table>
'''

# Display
st.markdown(html, unsafe_allow_html=True)