Spaces:

holistic-ai
/

explainbility_benchmark

Sleeping

explainbility_benchmark / pages /1_Single_Evaluation.py

Zekun Wu

update

90100ff 6 months ago

4.52 kB

	import pandas as pd
	import streamlit as st
	from util.evaluator import evaluator,write_evaluation_commentary
	import os

	# Predefined examples
	examples = {
	'good': {
	'question': "What causes rainbows to appear in the sky?",
	'explanation': "Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky."
	},
	'bad': {
	'question': "What causes rainbows to appear in the sky?",
	'explanation': "Rainbows happen because light in the sky gets mixed up and sometimes shows colors when it's raining or when there is water around."
	}
	}



	# Function to check password
	def check_password():
	def password_entered():
	if password_input == os.getenv('PASSWORD'):
	st.session_state['password_correct'] = True
	else:
	st.error("Incorrect Password, please try again.")

	password_input = st.text_input("Enter Password:", type="password")
	submit_button = st.button("Submit", on_click=password_entered)

	if submit_button and not st.session_state.get('password_correct', False):
	st.error("Please enter a valid password to access the demo.")


	# Title of the application
	st.title('Single Evaluation of Explanations')

	# Description of the application
	st.sidebar.write("""
	### Welcome to the Single Evaluation of Explanations Demo
	This application allows you to evaluate the quality of explanations generated for various questions using different language models. You can either use predefined examples or input your own questions and explanations.
	""")

	# Explanation of principles
	st.sidebar.write("""
	### Explanation Principles
	When evaluating explanations, consider the following principles mapped to user empowerment and regulatory compliance outcomes:

	1. Factually Correct: The information should be accurate and relevant to empower users and meet external audit requirements.
	2. Useful: Explanations should be clear and meaningful, helping users make informed decisions.
	3. Context Specific: Explanations should be tailored to the context of use, enhancing their relevance and utility.
	4. User Specific: Explanations should address the needs and preferences of the user, enabling better decision-making.
	5. Provide Pluralism: Explanations should present diverse perspectives, allowing users to understand different viewpoints and make well-rounded decisions.
	""")
	# Check if password has been validated
	if not st.session_state.get('password_correct', False):
	check_password()
	else:
	st.sidebar.success("Password Verified. Proceed with the demo.")
	model_name = st.selectbox('Select a model:', ['gpt4-1106', 'gpt35-1106'])

	# User choice between predefined examples or their own input
	input_type = st.radio("Choose input type:", ('Use predefined example', 'Enter your own'))

	if input_type == 'Use predefined example':
	example_type = st.radio("Select an example type:", ('good', 'bad'))
	question = examples[example_type]['question']
	explanation = examples[example_type]['explanation']
	else:
	question = st.text_input('Enter your question:', '')
	explanation = st.text_input('Enter your explanation:', '')

	# Display the selected or entered question and explanation
	st.write('### Question')
	st.write(question if question else 'No question entered yet.')

	st.write('### Explanation')
	st.write(explanation if explanation else 'No explanation entered yet.')

	if st.button('Evaluate Explanation'):
	if question and explanation:
	eval = evaluator(model_name)
	scores = eval.evaluate_single(question, explanation)
	st.write('### Scores')
	details = write_evaluation_commentary(scores)
	df = pd.DataFrame(details)
	st.write(df)

	data = {
	'Question': question,
	'Explanation': explanation,
	**{detail['Principle']: detail['Score'] for detail in details}
	}
	df = pd.DataFrame([data])

	# Convert DataFrame to CSV for download
	csv = df.to_csv(index=False)
	st.download_button(
	label="Download evaluation as CSV",
	data=csv,
	file_name='evaluation.csv',
	mime='text/csv',
	)
	else:
	st.error('Please enter both a question and an explanation to evaluate.')