Spaces:

holistic-ai
/

explainbility_benchmark

Sleeping

File size: 4,594 Bytes

b0eb8db
0c2bd43
 
 
 
8766924
b0eb8db
0c2bd43
5485067
 
 
 
 
 
 
 
 
 
 
 
0c2bd43
b0eb8db
0da3235
0c2bd43
8766924
 
0c2bd43
 
8766924
 
 
 
 
 
d3bca1f
0da3235
 
 
 
 
 
0c2bd43
8766924
 
 
0c2bd43
 
8766924
b0eb8db
8c43009
0c2bd43
cc350c1
1e2d3d0
394d4b8
cc350c1
 
 
 
1e2d3d0
cc350c1
 
 
 
 
 
 
 
 
b0eb8db
5485067
0da3235
 
 
b0eb8db
 
cc350c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0eb8db
0da3235
0c2bd43
0da3235
 
 
0c2bd43
 
 
0da3235
 
0c2bd43
0da3235
0c2bd43
0da3235
0c2bd43

import os
import pandas as pd
import streamlit as st
from util.evaluator import evaluator, write_evaluation_commentary


# Function to check password
def check_password():
    def password_entered():
        if password_input == os.getenv('PASSWORD'):
            st.session_state['password_correct'] = True
        else:
            st.error("Incorrect Password, please try again.")

    password_input = st.text_input("Enter Password:", type="password")
    submit_button = st.button("Submit", on_click=password_entered)

    if submit_button and not st.session_state.get('password_correct', False):
        st.error("Please enter a valid password to access the demo.")


# Function to batch evaluate explanations
def batch_evaluate(uploaded_file):
    df = pd.read_csv(uploaded_file)
    eval_instance = evaluator('gpt4-1106')  # Assuming fixed model name for simplicity
    total_rows = len(df)
    results = []

    # Add a progress bar
    progress_bar = st.progress(0)

    for index, row in enumerate(df.itertuples(), start=1):
        question = row.question
        explanation = row.explanation
        scores = eval_instance.evaluate_single(question, explanation)  # Evaluate using the evaluator
        commentary_details = write_evaluation_commentary(scores)  # Generate commentary based on scores
        results.append({
            'Question': question,
            'Explanation': explanation,
            **{detail['Principle']: detail['Score'] for detail in commentary_details}
        })

        # Update progress bar
        progress_bar.progress(index / total_rows)

    return pd.DataFrame(results)


# Title of the application
st.title('Batch Evaluation of Explanations')

# Description of the application
st.sidebar.write("""
### Welcome to the Batch Evaluation of Explanations Demo
This application allows you to evaluate the quality of explanations generated for various questions using different language models. You can either use predefined examples or input your own questions and explanations.
""")

# Explanation of principles
st.sidebar.write("""
### Explanation Principles
When evaluating explanations, consider the following principles mapped to user empowerment and regulatory compliance outcomes:

1. **Factually Correct**: The information should be accurate and relevant to empower users and meet external audit requirements.
2. **Useful**: Explanations should be clear and meaningful, helping users make informed decisions.
3. **Context Specific**: Explanations should be tailored to the context of use, enhancing their relevance and utility.
4. **User Specific**: Explanations should address the needs and preferences of the user, enabling better decision-making.
5. **Provide Pluralism**: Explanations should present diverse perspectives, allowing users to understand different viewpoints and make well-rounded decisions.
""")
# Check if password has been validated
if not st.session_state.get('password_correct', False):
    check_password()
else:
    st.sidebar.success("Password Verified. Proceed with the demo.")

    st.write("""
        ### Instructions for Uploading CSV
        Please upload a CSV file with the following columns:
        - `question`: The question you want evaluated.
        - `explanation`: The explanation corresponding to the question.

        **Example CSV Format:**
        """)

    # Display an example DataFrame
    example_data = {
        "question": [
            "What causes rainbows to appear in the sky?",
            "Why is the sky blue?"
        ],
        "explanation": [
            "Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky.",
            "The sky is blue because molecules in the air scatter blue light from the sun more than they scatter red light."
        ]
    }
    example_df = pd.DataFrame(example_data)
    st.dataframe(example_df)

    uploaded_file = st.file_uploader("Upload CSV file with 'question' and 'explanation' columns", type=['csv'])

    if uploaded_file is not None:
        if st.button('Evaluate Explanations'):
            result_df = batch_evaluate(uploaded_file)
            st.write('### Evaluated Results')
            st.dataframe(result_df)

            # Create a CSV download link
            csv = result_df.to_csv(index=False)
            st.download_button(
                label="Download evaluation results as CSV",
                data=csv,
                file_name='evaluated_results.csv',
                mime='text/csv',
            )