Zekun Wu commited on
Commit
ea070cc
1 Parent(s): 0eb1a66
Files changed (2) hide show
  1. app.py +13 -40
  2. evaluator.py +40 -0
app.py CHANGED
@@ -1,5 +1,6 @@
 
1
  import streamlit as st
2
- from evaluator import evaluator
3
  import os
4
 
5
  # Predefined examples
@@ -14,45 +15,7 @@ examples = {
14
  }
15
  }
16
 
17
- def write_evaluation_commentary(scores):
18
- for principle, score in scores.items():
19
- if principle == "Factually Correct":
20
- if score >= 0.8:
21
- comment = "Excellent accuracy! The information is precise and directly relevant to the question."
22
- elif score >= 0.5:
23
- comment = "Moderately accurate, but some details may not be completely correct or are somewhat irrelevant."
24
- else:
25
- comment = "The explanation contains significant inaccuracies or irrelevant information."
26
- elif principle == "Useful":
27
- if score >= 0.8:
28
- comment = "Highly useful! The explanation clearly enhances understanding and aids in further reasoning or decision-making."
29
- elif score >= 0.5:
30
- comment = "Somewhat useful, though it could be more insightful or practical in aiding understanding."
31
- else:
32
- comment = "The explanation does little to help understand or apply the information provided."
33
- elif principle == "Context Specific":
34
- if score >= 0.8:
35
- comment = "Perfectly tailored to the context of the question, addressing the specific scenario effectively."
36
- elif score >= 0.5:
37
- comment = "Generally addresses the context, but may miss specific details or nuances relevant to the question."
38
- else:
39
- comment = "Fails to address the context of the question, lacking relevance or specificity."
40
- elif principle == "User Specific":
41
- if score >= 0.8:
42
- comment = "The explanation is well-adapted to the user's knowledge level and interests, demonstrating thoughtfulness."
43
- elif score >= 0.5:
44
- comment = "Moderately considerate of the user's knowledge level, but could be more tailored."
45
- else:
46
- comment = "Does not consider the user's background or interests, potentially leading to confusion or disinterest."
47
- elif principle == "Provides Pluralism":
48
- if score >= 0.8:
49
- comment = "Provides an excellent range of perspectives or interpretations, fostering a comprehensive understanding."
50
- elif score >= 0.5:
51
- comment = "Offers some alternative perspectives, but more could be provided to enrich understanding."
52
- else:
53
- comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
54
 
55
- st.write(f"{principle} ({score}): {comment}")
56
 
57
  # Function to check password
58
  def check_password():
@@ -101,6 +64,16 @@ else:
101
  eval = evaluator(model_name)
102
  scores = eval(question, explanation)
103
  st.write('### Scores')
104
- write_evaluation_commentary(scores)
 
 
 
 
 
 
 
 
 
 
105
  else:
106
  st.error('Please enter both a question and an explanation to evaluate.')
 
1
+ import pandas as pd
2
  import streamlit as st
3
+ from evaluator import evaluator,write_evaluation_commentary
4
  import os
5
 
6
  # Predefined examples
 
15
  }
16
  }
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
19
 
20
  # Function to check password
21
  def check_password():
 
64
  eval = evaluator(model_name)
65
  scores = eval(question, explanation)
66
  st.write('### Scores')
67
+ details = write_evaluation_commentary(scores)
68
+ df = pd.DataFrame(details)
69
+ st.write(df)
70
+
71
+ csv = df.to_csv(index=False)
72
+ st.download_button(
73
+ label="Download evaluation as CSV",
74
+ data=csv,
75
+ file_name='evaluation.csv',
76
+ mime='text/csv',
77
+ )
78
  else:
79
  st.error('Please enter both a question and an explanation to evaluate.')
evaluator.py CHANGED
@@ -75,7 +75,47 @@ class evaluator:
75
 
76
  return self.validate_scores(scores)
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
 
 
79
 
80
  if __name__ == '__main__':
81
  eval = evaluator()
 
75
 
76
  return self.validate_scores(scores)
77
 
78
+ def write_evaluation_commentary(scores):
79
+ evaluation_details = []
80
+ for principle, score in scores.items():
81
+ if principle == "Factually Correct":
82
+ if score >= 0.8:
83
+ comment = "Excellent accuracy! The information is precise and directly relevant to the question."
84
+ elif score >= 0.5:
85
+ comment = "Moderately accurate, but some details may not be completely correct or are somewhat irrelevant."
86
+ else:
87
+ comment = "The explanation contains significant inaccuracies or irrelevant information."
88
+ elif principle == "Useful":
89
+ if score >= 0.8:
90
+ comment = "Highly useful! The explanation clearly enhances understanding and aids in further reasoning or decision-making."
91
+ elif score >= 0.5:
92
+ comment = "Somewhat useful, though it could be more insightful or practical in aiding understanding."
93
+ else:
94
+ comment = "The explanation does little to help understand or apply the information provided."
95
+ elif principle == "Context Specific":
96
+ if score >= 0.8:
97
+ comment = "Perfectly tailored to the context of the question, addressing the specific scenario effectively."
98
+ elif score >= 0.5:
99
+ comment = "Generally addresses the context, but may miss specific details or nuances relevant to the question."
100
+ else:
101
+ comment = "Fails to address the context of the question, lacking relevance or specificity."
102
+ elif principle == "User Specific":
103
+ if score >= 0.8:
104
+ comment = "The explanation is well-adapted to the user's knowledge level and interests, demonstrating thoughtfulness."
105
+ elif score >= 0.5:
106
+ comment = "Moderately considerate of the user's knowledge level, but could be more tailored."
107
+ else:
108
+ comment = "Does not consider the user's background or interests, potentially leading to confusion or disinterest."
109
+ elif principle == "Provides Pluralism":
110
+ if score >= 0.8:
111
+ comment = "Provides an excellent range of perspectives or interpretations, fostering a comprehensive understanding."
112
+ elif score >= 0.5:
113
+ comment = "Offers some alternative perspectives, but more could be provided to enrich understanding."
114
+ else:
115
+ comment = "Lacks diversity in viewpoints, limiting the depth of exploration into the topic."
116
 
117
+ evaluation_details.append({'Principle': principle, 'Score': score, 'Commentary': comment})
118
+ return evaluation_details
119
 
120
  if __name__ == '__main__':
121
  eval = evaluator()