Zekun Wu commited on
Commit
651ff6d
·
1 Parent(s): 8bed6d7
Files changed (1) hide show
  1. pages/2_batch_evaluation.py +66 -0
pages/2_batch_evaluation.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from util.evaluator import evaluator, write_evaluation_commentary
4
+ import os
5
+
6
+ # Predefined examples
7
+ examples = {
8
+ 'good': {
9
+ 'question': "What causes rainbows to appear in the sky?",
10
+ 'explanation': "Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky."
11
+ },
12
+ 'bad': {
13
+ 'question': "What causes rainbows to appear in the sky?",
14
+ 'explanation': "Rainbows happen because light in the sky gets mixed up and sometimes shows colors when it's raining or when there is water around."
15
+ }
16
+ }
17
+
18
+ # Function to check password
19
+ def check_password():
20
+ password_input = st.sidebar.text_input("Enter Password:", type="password")
21
+ submit_button = st.sidebar.button("Submit")
22
+ if submit_button:
23
+ if password_input == os.getenv('PASSWORD'):
24
+ st.session_state['password_verified'] = True
25
+ st.experimental_rerun()
26
+ else:
27
+ st.sidebar.error("Incorrect Password, please try again.")
28
+
29
+ def batch_evaluate(uploaded_file):
30
+ df = pd.read_csv(uploaded_file)
31
+ eval = evaluator(model_name='gpt4-1106') # Assuming model name is fixed for simplicity
32
+ results = []
33
+
34
+ for _, row in df.iterrows():
35
+ question = row['question']
36
+ explanation = row['explanation']
37
+ scores = eval(question, explanation)
38
+ commentary = write_evaluation_commentary(scores)
39
+ results.append({**{'Question': question, 'Explanation': explanation}, **commentary})
40
+
41
+ result_df = pd.DataFrame(results)
42
+ return result_df
43
+
44
+ # Title of the application
45
+ st.title('Natural Language Explanation Demo')
46
+
47
+ if 'password_verified' not in st.session_state or not st.session_state['password_verified']:
48
+ check_password()
49
+ else:
50
+ st.sidebar.success("Password Verified. Proceed with the demo.")
51
+ st.header("Batch Evaluation of Questions and Explanations")
52
+ uploaded_file = st.file_uploader("Upload CSV file with columns 'question' and 'explanation'", type='csv')
53
+
54
+ if uploaded_file is not None:
55
+ if st.button('Evaluate Explanations'):
56
+ result_df = batch_evaluate(uploaded_file)
57
+ st.write('### Evaluated Results')
58
+ st.dataframe(result_df)
59
+
60
+ csv = result_df.to_csv(index=False)
61
+ st.download_button(
62
+ label="Download evaluation results as CSV",
63
+ data=csv,
64
+ file_name='evaluated_results.csv',
65
+ mime='text/csv'
66
+ )