Presidentlin commited on
Commit
fb39607
·
1 Parent(s): c77c9f7
Files changed (3) hide show
  1. __pycache__/main.cpython-310.pyc +0 -0
  2. app.py +12 -0
  3. main.py +2 -6
__pycache__/main.cpython-310.pyc CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
 
app.py CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
2
  from main import benchmark_model_multithreaded, benchmark_model_sequential
3
  from prompts import questions as predefined_questions
4
  import requests
 
5
 
6
  # Set the title in the browser tab
7
  st.set_page_config(page_title="Aidan Bench - Generator")
@@ -130,11 +131,22 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
130
  results_table.append({
131
  "Question": result["question"],
132
  "Answer": answer,
 
 
133
  "Coherence Score": result["coherence_score"],
134
  "Novelty Score": result["novelty_score"]
135
  })
136
  st.table(results_table)
137
 
 
 
 
 
 
 
 
 
 
138
  if stop_button:
139
  st.warning("Partial results displayed due to interruption.")
140
  else:
 
2
  from main import benchmark_model_multithreaded, benchmark_model_sequential
3
  from prompts import questions as predefined_questions
4
  import requests
5
+ import pandas as pd
6
 
7
  # Set the title in the browser tab
8
  st.set_page_config(page_title="Aidan Bench - Generator")
 
131
  results_table.append({
132
  "Question": result["question"],
133
  "Answer": answer,
134
+ "Contestant Model": model_name,
135
+ "Judge Model": 'openai/gpt-4o-mini',
136
  "Coherence Score": result["coherence_score"],
137
  "Novelty Score": result["novelty_score"]
138
  })
139
  st.table(results_table)
140
 
141
+ df = pd.DataFrame(results_table) # Create a Pandas DataFrame from the results
142
+ csv = df.to_csv(index=False).encode('utf-8') # Convert DataFrame to CSV
143
+ st.download_button(
144
+ label="Export Results as CSV",
145
+ data=csv,
146
+ file_name="benchmark_results.csv",
147
+ mime='text/csv'
148
+ )
149
+
150
  if stop_button:
151
  st.warning("Partial results displayed due to interruption.")
152
  else:
main.py CHANGED
@@ -51,16 +51,12 @@ def process_question(question, model_name, open_router_key, openai_api_key, resu
51
  if coherence_score is None:
52
  break
53
 
54
- if coherence_score <= 3:
55
- # st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
56
- # unsafe_allow_html=True)
57
  break
58
 
59
  novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
60
 
61
- if novelty_score < 0.1:
62
- # st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
63
- # unsafe_allow_html=True)
64
  break
65
 
66
  # Append results to the queue instead of using st.write
 
51
  if coherence_score is None:
52
  break
53
 
54
+ if coherence_score <= 6:
 
 
55
  break
56
 
57
  novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
58
 
59
+ if novelty_score < 0.3:
 
 
60
  break
61
 
62
  # Append results to the queue instead of using st.write