Presidentlin commited on
Commit
9aa9fbb
·
1 Parent(s): 243ccd3
Files changed (1) hide show
  1. main.py +20 -37
main.py CHANGED
@@ -7,7 +7,6 @@ import threading
7
  import streamlit as st # Import Streamlit
8
  import queue
9
 
10
-
11
  def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
12
  """Generates an answer to a question using the specified language model."""
13
  gen_prompt = create_gen_prompt(question, previous_answers)
@@ -16,8 +15,7 @@ def generate_answer(question, previous_answers, model_name, open_router_key, ope
16
  openai_api_key=openai_api_key)
17
  return new_answer
18
  except Exception as e:
19
- st.write(f"<span style='color:red'>Error generating answer: {str(e)}</span>",
20
- unsafe_allow_html=True)
21
  return None
22
 
23
 
@@ -31,8 +29,7 @@ def evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge
31
  coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
32
  return coherence_score
33
  except Exception as e:
34
- st.write(f"<span style='color:red'>Error getting judge response: {str(e)}</span>",
35
- unsafe_allow_html=True)
36
  return None
37
 
38
 
@@ -153,21 +150,17 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
153
  if result["type"] == "answer":
154
  st.write(f"**Question:** {result['question']}")
155
  st.write(f"**New Answer:**\n{result['answer']}")
156
- st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
157
- unsafe_allow_html=True)
158
  st.write(f"**Novelty Score:** {result['novelty_score']}")
159
- results.extend(result["results"]) # Add results here
160
- novelty_score += result["novelty_score"] # Update novelty score
161
- st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
162
 
163
  elif result["type"] == "summary":
164
- st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
165
- unsafe_allow_html=True)
166
- st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
167
- unsafe_allow_html=True)
168
  elif result["type"] == "error":
169
- st.write(f"<span style='color:red'>Error in thread: {result['message']}</span>",
170
- unsafe_allow_html=True)
171
 
172
  # Process remaining results in the queue (if any)
173
  while not result_queue.empty():
@@ -175,24 +168,20 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
175
  if result["type"] == "answer":
176
  st.write(f"**Question:** {result['question']}")
177
  st.write(f"**New Answer:**\n{result['answer']}")
178
- st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
179
- unsafe_allow_html=True)
180
  st.write(f"**Novelty Score:** {result['novelty_score']}")
181
  results.extend(result["results"]) # Add results here
182
  novelty_score += result["novelty_score"] # Update novelty score
183
  st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
184
 
185
  elif result["type"] == "summary":
186
- st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
187
- unsafe_allow_html=True)
188
- st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
189
- unsafe_allow_html=True)
190
  elif result["type"] == "error":
191
- st.write(f"<span style='color:red'>Error in thread: {result['message']}</span>",
192
- unsafe_allow_html=True)
193
 
194
 
195
- st.warning(f"Final total novelty score across all questions: {novelty_score}")
196
  return results
197
 
198
 
@@ -205,24 +194,18 @@ def benchmark_model_sequential(model_name, questions, open_router_key, openai_ap
205
  if result["type"] == "answer":
206
  st.write(f"**Question:** {result['question']}")
207
  st.write(f"**New Answer:**\n{result['answer']}")
208
- st.write(f"<span style='color:green'>Coherence Score: {result['coherence_score']}</span>",
209
- unsafe_allow_html=True)
210
  st.write(f"**Novelty Score:** {result['novelty_score']}")
211
  results.extend(result["results"])
212
  novelty_score += result["novelty_score"] # Add to novelty score
213
- st.warning(
214
- f"Total novelty score across processed questions: {novelty_score}")
215
 
216
  elif result["type"] == "summary":
217
- st.write(f"<span style='color:blue'>Total novelty score for question '{result['question']}': {result['total_novelty']}</span>",
218
- unsafe_allow_html=True)
219
- st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
220
- unsafe_allow_html=True)
221
-
222
  elif result["type"] == "error":
223
- st.write(f"<span style='color:red'>Error in thread: {result['message']}</span>",
224
- unsafe_allow_html=True)
225
 
226
- st.warning(f"Final total novelty score across all questions: {novelty_score}")
227
 
228
  return results
 
7
  import streamlit as st # Import Streamlit
8
  import queue
9
 
 
10
  def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
11
  """Generates an answer to a question using the specified language model."""
12
  gen_prompt = create_gen_prompt(question, previous_answers)
 
15
  openai_api_key=openai_api_key)
16
  return new_answer
17
  except Exception as e:
18
+ st.error(f"Error generating answer: {str(e)}") # Use st.error
 
19
  return None
20
 
21
 
 
29
  coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
30
  return coherence_score
31
  except Exception as e:
32
+ st.error(f"Error getting judge response: {str(e)}") # Use st.error
 
33
  return None
34
 
35
 
 
150
  if result["type"] == "answer":
151
  st.write(f"**Question:** {result['question']}")
152
  st.write(f"**New Answer:**\n{result['answer']}")
153
+ st.write(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
 
154
  st.write(f"**Novelty Score:** {result['novelty_score']}")
155
+ results.extend(result["results"])
156
+ novelty_score += result["novelty_score"]
157
+ st.info(f"Total novelty score across all questions (so far): {novelty_score}") # st.info for running total
158
 
159
  elif result["type"] == "summary":
160
+ st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
161
+ st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
 
 
162
  elif result["type"] == "error":
163
+ st.error(f"Error in thread: {result['message']}") # st.error for errors
 
164
 
165
  # Process remaining results in the queue (if any)
166
  while not result_queue.empty():
 
168
  if result["type"] == "answer":
169
  st.write(f"**Question:** {result['question']}")
170
  st.write(f"**New Answer:**\n{result['answer']}")
171
+ st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
 
172
  st.write(f"**Novelty Score:** {result['novelty_score']}")
173
  results.extend(result["results"]) # Add results here
174
  novelty_score += result["novelty_score"] # Update novelty score
175
  st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
176
 
177
  elif result["type"] == "summary":
178
+ st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
179
+ st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
 
 
180
  elif result["type"] == "error":
181
+ st.error(f"Error in thread: {result['message']}") # st.error for errors
 
182
 
183
 
184
+ st.info(f"Final total novelty score across all questions: {novelty_score}")
185
  return results
186
 
187
 
 
194
  if result["type"] == "answer":
195
  st.write(f"**Question:** {result['question']}")
196
  st.write(f"**New Answer:**\n{result['answer']}")
197
+ st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
 
198
  st.write(f"**Novelty Score:** {result['novelty_score']}")
199
  results.extend(result["results"])
200
  novelty_score += result["novelty_score"] # Add to novelty score
201
+ st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
 
202
 
203
  elif result["type"] == "summary":
204
+ st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
205
+ st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
 
 
 
206
  elif result["type"] == "error":
207
+ st.error(f"Error in thread: {result['message']}") # st.error for errors
 
208
 
209
+ st.info(f"Final total novelty score across all questions: {novelty_score}")
210
 
211
  return results