Spaces:
Runtime error
Runtime error
Commit
·
9aa9fbb
1
Parent(s):
243ccd3
main.py
CHANGED
@@ -7,7 +7,6 @@ import threading
|
|
7 |
import streamlit as st # Import Streamlit
|
8 |
import queue
|
9 |
|
10 |
-
|
11 |
def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
|
12 |
"""Generates an answer to a question using the specified language model."""
|
13 |
gen_prompt = create_gen_prompt(question, previous_answers)
|
@@ -16,8 +15,7 @@ def generate_answer(question, previous_answers, model_name, open_router_key, ope
|
|
16 |
openai_api_key=openai_api_key)
|
17 |
return new_answer
|
18 |
except Exception as e:
|
19 |
-
st.
|
20 |
-
unsafe_allow_html=True)
|
21 |
return None
|
22 |
|
23 |
|
@@ -31,8 +29,7 @@ def evaluate_answer(question, new_answer, open_router_key, openai_api_key, judge
|
|
31 |
coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
|
32 |
return coherence_score
|
33 |
except Exception as e:
|
34 |
-
st.
|
35 |
-
unsafe_allow_html=True)
|
36 |
return None
|
37 |
|
38 |
|
@@ -153,21 +150,17 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
|
|
153 |
if result["type"] == "answer":
|
154 |
st.write(f"**Question:** {result['question']}")
|
155 |
st.write(f"**New Answer:**\n{result['answer']}")
|
156 |
-
st.write(f"
|
157 |
-
unsafe_allow_html=True)
|
158 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
159 |
-
results.extend(result["results"])
|
160 |
-
novelty_score += result["novelty_score"]
|
161 |
-
st.
|
162 |
|
163 |
elif result["type"] == "summary":
|
164 |
-
st.
|
165 |
-
|
166 |
-
st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
|
167 |
-
unsafe_allow_html=True)
|
168 |
elif result["type"] == "error":
|
169 |
-
st.
|
170 |
-
unsafe_allow_html=True)
|
171 |
|
172 |
# Process remaining results in the queue (if any)
|
173 |
while not result_queue.empty():
|
@@ -175,24 +168,20 @@ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai
|
|
175 |
if result["type"] == "answer":
|
176 |
st.write(f"**Question:** {result['question']}")
|
177 |
st.write(f"**New Answer:**\n{result['answer']}")
|
178 |
-
st.
|
179 |
-
unsafe_allow_html=True)
|
180 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
181 |
results.extend(result["results"]) # Add results here
|
182 |
novelty_score += result["novelty_score"] # Update novelty score
|
183 |
st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
|
184 |
|
185 |
elif result["type"] == "summary":
|
186 |
-
st.
|
187 |
-
|
188 |
-
st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
|
189 |
-
unsafe_allow_html=True)
|
190 |
elif result["type"] == "error":
|
191 |
-
st.
|
192 |
-
unsafe_allow_html=True)
|
193 |
|
194 |
|
195 |
-
st.
|
196 |
return results
|
197 |
|
198 |
|
@@ -205,24 +194,18 @@ def benchmark_model_sequential(model_name, questions, open_router_key, openai_ap
|
|
205 |
if result["type"] == "answer":
|
206 |
st.write(f"**Question:** {result['question']}")
|
207 |
st.write(f"**New Answer:**\n{result['answer']}")
|
208 |
-
st.
|
209 |
-
unsafe_allow_html=True)
|
210 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
211 |
results.extend(result["results"])
|
212 |
novelty_score += result["novelty_score"] # Add to novelty score
|
213 |
-
st.
|
214 |
-
f"Total novelty score across processed questions: {novelty_score}")
|
215 |
|
216 |
elif result["type"] == "summary":
|
217 |
-
st.
|
218 |
-
|
219 |
-
st.write(f"<span style='color:blue'>Time taken: {result['time_taken']} seconds</span>",
|
220 |
-
unsafe_allow_html=True)
|
221 |
-
|
222 |
elif result["type"] == "error":
|
223 |
-
st.
|
224 |
-
unsafe_allow_html=True)
|
225 |
|
226 |
-
st.
|
227 |
|
228 |
return results
|
|
|
7 |
import streamlit as st # Import Streamlit
|
8 |
import queue
|
9 |
|
|
|
10 |
def generate_answer(question, previous_answers, model_name, open_router_key, openai_api_key):
|
11 |
"""Generates an answer to a question using the specified language model."""
|
12 |
gen_prompt = create_gen_prompt(question, previous_answers)
|
|
|
15 |
openai_api_key=openai_api_key)
|
16 |
return new_answer
|
17 |
except Exception as e:
|
18 |
+
st.error(f"Error generating answer: {str(e)}") # Use st.error
|
|
|
19 |
return None
|
20 |
|
21 |
|
|
|
29 |
coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
|
30 |
return coherence_score
|
31 |
except Exception as e:
|
32 |
+
st.error(f"Error getting judge response: {str(e)}") # Use st.error
|
|
|
33 |
return None
|
34 |
|
35 |
|
|
|
150 |
if result["type"] == "answer":
|
151 |
st.write(f"**Question:** {result['question']}")
|
152 |
st.write(f"**New Answer:**\n{result['answer']}")
|
153 |
+
st.write(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
|
|
|
154 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
155 |
+
results.extend(result["results"])
|
156 |
+
novelty_score += result["novelty_score"]
|
157 |
+
st.info(f"Total novelty score across all questions (so far): {novelty_score}") # st.info for running total
|
158 |
|
159 |
elif result["type"] == "summary":
|
160 |
+
st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
|
161 |
+
st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
|
|
|
|
|
162 |
elif result["type"] == "error":
|
163 |
+
st.error(f"Error in thread: {result['message']}") # st.error for errors
|
|
|
164 |
|
165 |
# Process remaining results in the queue (if any)
|
166 |
while not result_queue.empty():
|
|
|
168 |
if result["type"] == "answer":
|
169 |
st.write(f"**Question:** {result['question']}")
|
170 |
st.write(f"**New Answer:**\n{result['answer']}")
|
171 |
+
st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
|
|
|
172 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
173 |
results.extend(result["results"]) # Add results here
|
174 |
novelty_score += result["novelty_score"] # Update novelty score
|
175 |
st.warning(f"Total novelty score across all questions (so far): {novelty_score}")
|
176 |
|
177 |
elif result["type"] == "summary":
|
178 |
+
st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
|
179 |
+
st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
|
|
|
|
|
180 |
elif result["type"] == "error":
|
181 |
+
st.error(f"Error in thread: {result['message']}") # st.error for errors
|
|
|
182 |
|
183 |
|
184 |
+
st.info(f"Final total novelty score across all questions: {novelty_score}")
|
185 |
return results
|
186 |
|
187 |
|
|
|
194 |
if result["type"] == "answer":
|
195 |
st.write(f"**Question:** {result['question']}")
|
196 |
st.write(f"**New Answer:**\n{result['answer']}")
|
197 |
+
st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
|
|
|
198 |
st.write(f"**Novelty Score:** {result['novelty_score']}")
|
199 |
results.extend(result["results"])
|
200 |
novelty_score += result["novelty_score"] # Add to novelty score
|
201 |
+
st.success(f"Coherence Score: {result['coherence_score']}") # st.success for coherence
|
|
|
202 |
|
203 |
elif result["type"] == "summary":
|
204 |
+
st.info(f"Total novelty score for question '{result['question']}': {result['total_novelty']}") # st.info for summary
|
205 |
+
st.info(f"Time taken: {result['time_taken']} seconds") # st.info for summary
|
|
|
|
|
|
|
206 |
elif result["type"] == "error":
|
207 |
+
st.error(f"Error in thread: {result['message']}") # st.error for errors
|
|
|
208 |
|
209 |
+
st.info(f"Final total novelty score across all questions: {novelty_score}")
|
210 |
|
211 |
return results
|