Spaces:
Runtime error
Runtime error
Commit
·
fb39607
1
Parent(s):
c77c9f7
- __pycache__/main.cpython-310.pyc +0 -0
- app.py +12 -0
- main.py +2 -6
__pycache__/main.cpython-310.pyc
CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
|
|
2 |
from main import benchmark_model_multithreaded, benchmark_model_sequential
|
3 |
from prompts import questions as predefined_questions
|
4 |
import requests
|
|
|
5 |
|
6 |
# Set the title in the browser tab
|
7 |
st.set_page_config(page_title="Aidan Bench - Generator")
|
@@ -130,11 +131,22 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
|
|
130 |
results_table.append({
|
131 |
"Question": result["question"],
|
132 |
"Answer": answer,
|
|
|
|
|
133 |
"Coherence Score": result["coherence_score"],
|
134 |
"Novelty Score": result["novelty_score"]
|
135 |
})
|
136 |
st.table(results_table)
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
if stop_button:
|
139 |
st.warning("Partial results displayed due to interruption.")
|
140 |
else:
|
|
|
2 |
from main import benchmark_model_multithreaded, benchmark_model_sequential
|
3 |
from prompts import questions as predefined_questions
|
4 |
import requests
|
5 |
+
import pandas as pd
|
6 |
|
7 |
# Set the title in the browser tab
|
8 |
st.set_page_config(page_title="Aidan Bench - Generator")
|
|
|
131 |
results_table.append({
|
132 |
"Question": result["question"],
|
133 |
"Answer": answer,
|
134 |
+
"Contestant Model": model_name,
|
135 |
+
"Judge Model": 'openai/gpt-4o-mini',
|
136 |
"Coherence Score": result["coherence_score"],
|
137 |
"Novelty Score": result["novelty_score"]
|
138 |
})
|
139 |
st.table(results_table)
|
140 |
|
141 |
+
df = pd.DataFrame(results_table) # Create a Pandas DataFrame from the results
|
142 |
+
csv = df.to_csv(index=False).encode('utf-8') # Convert DataFrame to CSV
|
143 |
+
st.download_button(
|
144 |
+
label="Export Results as CSV",
|
145 |
+
data=csv,
|
146 |
+
file_name="benchmark_results.csv",
|
147 |
+
mime='text/csv'
|
148 |
+
)
|
149 |
+
|
150 |
if stop_button:
|
151 |
st.warning("Partial results displayed due to interruption.")
|
152 |
else:
|
main.py
CHANGED
@@ -51,16 +51,12 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
|
|
51 |
if coherence_score is None:
|
52 |
break
|
53 |
|
54 |
-
if coherence_score <=
|
55 |
-
# st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
|
56 |
-
# unsafe_allow_html=True)
|
57 |
break
|
58 |
|
59 |
novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
|
60 |
|
61 |
-
if novelty_score < 0.
|
62 |
-
# st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
|
63 |
-
# unsafe_allow_html=True)
|
64 |
break
|
65 |
|
66 |
# Append results to the queue instead of using st.write
|
|
|
51 |
if coherence_score is None:
|
52 |
break
|
53 |
|
54 |
+
if coherence_score <= 6:
|
|
|
|
|
55 |
break
|
56 |
|
57 |
novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
|
58 |
|
59 |
+
if novelty_score < 0.3:
|
|
|
|
|
60 |
break
|
61 |
|
62 |
# Append results to the queue instead of using st.write
|