Zekun Wu commited on
Commit
0da3235
1 Parent(s): 0c2bd43
Files changed (1) hide show
  1. pages/2_batch_evaluation.py +26 -77
pages/2_batch_evaluation.py CHANGED
@@ -3,19 +3,6 @@ import streamlit as st
3
  from util.evaluator import evaluator, write_evaluation_commentary
4
  import os
5
 
6
- # Predefined examples
7
- examples = {
8
- 'good': {
9
- 'question': "What causes rainbows to appear in the sky?",
10
- 'explanation': "Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky."
11
- },
12
- 'bad': {
13
- 'question': "What causes rainbows to appear in the sky?",
14
- 'explanation': "Rainbows happen because light in the sky gets mixed up and sometimes shows colors when it's raining or when there is water around."
15
- }
16
- }
17
-
18
- # Function to check password
19
  def check_password():
20
  with st.sidebar:
21
  password_input = st.text_input("Enter Password:", type="password")
@@ -27,83 +14,45 @@ def check_password():
27
  else:
28
  st.error("Incorrect Password, please try again.")
29
 
30
- # Function to evaluate batch data
31
- def evaluate_batch(uploaded_file):
32
  df = pd.read_csv(uploaded_file)
33
- eval_instance = evaluator(model_name=st.session_state['model_name'])
34
  results = []
35
 
 
36
  for _, row in df.iterrows():
37
- scores = eval_instance(row['question'], row['explanation'])
38
- commentary = write_evaluation_commentary(scores)
39
- result = {
40
- 'Question': row['question'],
41
- 'Explanation': row['explanation'],
42
- **{c['Principle']: c['Score'] for c in commentary}
43
- }
44
- results.append(result)
 
45
 
46
  return pd.DataFrame(results)
47
 
48
- # Main app logic
49
- def main():
50
- st.title('Natural Language Explanation Demo')
51
-
52
- model_name = st.selectbox('Select a model:', ['gpt4-1106', 'gpt35-1106'])
53
- st.session_state['model_name'] = model_name # Save model name to session state for use in batch processing
54
 
55
- input_type = st.radio("Choose input type:", ('Use predefined example', 'Enter your own', 'Upload CSV for batch evaluation'))
 
 
 
 
56
 
57
- if input_type == 'Use predefined example':
58
- example_type = st.radio("Select an example type:", ('good', 'bad'))
59
- question = examples[example_type]['question']
60
- explanation = examples[example_type]['explanation']
61
- elif input_type == 'Enter your own':
62
- question = st.text_input('Enter your question:', '')
63
- explanation = st.text_input('Enter your explanation:', '')
64
- else:
65
- uploaded_file = st.file_uploader("Upload a CSV file", type='csv')
66
- if uploaded_file and st.button('Evaluate Batch'):
67
- result_df = evaluate_batch(uploaded_file)
68
  st.write('### Evaluated Results')
69
  st.dataframe(result_df)
70
- csv = result_df.to_csv(index=False)
71
- st.download_button(
72
- label="Download evaluated results as CSV",
73
- data=csv,
74
- file_name='batch_evaluation_results.csv',
75
- mime='text/csv'
76
- )
77
- return
78
-
79
- if st.button('Evaluate Explanation'):
80
- if question and explanation:
81
- eval_instance = evaluator(model_name)
82
- scores = eval_instance(question, explanation)
83
- st.write('### Scores')
84
- details = write_evaluation_commentary(scores)
85
- df = pd.DataFrame(details)
86
- st.write(df)
87
- data = {
88
- 'Question': question,
89
- 'Explanation': explanation,
90
- **{detail['Principle']: detail['Score'] for detail in details}
91
- }
92
- df = pd.DataFrame([data])
93
 
94
- # Convert DataFrame to CSV for download
95
- csv = df.to_csv(index=False)
96
  st.download_button(
97
- label="Download evaluation as CSV",
98
  data=csv,
99
- file_name='evaluation.csv',
100
  mime='text/csv',
101
  )
102
- else:
103
- st.error('Please enter both a question and an explanation to evaluate.')
104
-
105
- if __name__ == '__main__':
106
- if 'password_verified' not in st.session_state or not st.session_state['password_verified']:
107
- check_password()
108
- else:
109
- main()
 
3
  from util.evaluator import evaluator, write_evaluation_commentary
4
  import os
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def check_password():
7
  with st.sidebar:
8
  password_input = st.text_input("Enter Password:", type="password")
 
14
  else:
15
  st.error("Incorrect Password, please try again.")
16
 
17
+ def batch_evaluate(uploaded_file):
18
+ # Read the uploaded CSV file into DataFrame
19
  df = pd.read_csv(uploaded_file)
20
+ eval_instance = evaluator('gpt4-1106') # Using fixed model name for simplicity
21
  results = []
22
 
23
+ # Process each row in the DataFrame
24
  for _, row in df.iterrows():
25
+ question = row['question']
26
+ explanation = row['explanation']
27
+ scores = eval_instance(question, explanation) # Evaluate using the evaluator
28
+ commentary_details = write_evaluation_commentary(scores) # Generate commentary based on scores
29
+ results.append({
30
+ 'Question': question,
31
+ 'Explanation': explanation,
32
+ **{detail['Principle']: detail['Score'] for detail in commentary_details}
33
+ })
34
 
35
  return pd.DataFrame(results)
36
 
37
+ st.title('Natural Language Explanation Demo')
 
 
 
 
 
38
 
39
+ if 'password_verified' not in st.session_state or not st.session_state['password_verified']:
40
+ check_password()
41
+ else:
42
+ st.sidebar.success("Password Verified. Proceed with the demo.")
43
+ uploaded_file = st.file_uploader("Upload CSV file with 'question' and 'explanation' columns", type=['csv'])
44
 
45
+ if uploaded_file is not None:
46
+ if st.button('Evaluate Explanations'):
47
+ result_df = batch_evaluate(uploaded_file)
 
 
 
 
 
 
 
 
48
  st.write('### Evaluated Results')
49
  st.dataframe(result_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # Create a CSV download link
52
+ csv = result_df.to_csv(index=False)
53
  st.download_button(
54
+ label="Download evaluation results as CSV",
55
  data=csv,
56
+ file_name='evaluated_results.csv',
57
  mime='text/csv',
58
  )