Spaces:

Presidentlin
/

Aidan-Bench

Runtime error

App Files Files Community

Presidentlin commited on Aug 12, 2024

Commit

9d92eeb

1 Parent(s): 9a229f8

x

Browse files

Files changed (3) hide show

__pycache__/main.cpython-310.pyc +0 -0
app.py +8 -2
main.py +40 -15

__pycache__/main.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -95,6 +95,12 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
     # Choose execution mode
     execution_mode = st.radio("Execution Mode:", ["Sequential", "Multithreaded"])
     # Benchmark Execution
     if st.button("Start Benchmark"):
         if not selected_questions:
@@ -115,9 +121,9 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
                 # ... (benchmarking logic using the chosen execution mode)
                 if execution_mode == "Sequential":
-                    question_results = benchmark_model_sequential(model_name, [question], st.session_state.open_router_key, st.session_state.openai_api_key)
                 else:  # Multithreaded
-                    question_results = benchmark_model_multithreaded(model_name, [question], st.session_state.open_router_key, st.session_state.openai_api_key)
                 results.extend(question_results)

     # Choose execution mode
     execution_mode = st.radio("Execution Mode:", ["Sequential", "Multithreaded"])
+    # If multithreaded, allow user to configure thread pool size
+    if execution_mode == "Multithreaded":
+        max_threads = st.slider("Maximum Number of Threads:", 1, 10, 4)  # Default to 4 threads
+    else:
+        max_threads = None  # For sequential mode
     # Benchmark Execution
     if st.button("Start Benchmark"):
         if not selected_questions:
                 # ... (benchmarking logic using the chosen execution mode)
                 if execution_mode == "Sequential":
+                    question_results = benchmark_model_sequential(model_name, selected_questions, st.session_state.open_router_key, st.session_state.openai_api_key)
                 else:  # Multithreaded
+                    question_results = benchmark_model_multithreaded(model_name, selected_questions, st.session_state.open_router_key, st.session_state.openai_api_key, max_threads)
                 results.extend(question_results)

main.py CHANGED Viewed

@@ -6,7 +6,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading
 import streamlit as st  # Import Streamlit
-def process_question(question, model_name, open_router_key, openai_api_key):
     start_time = time.time()
     st.write(f"<span style='color:red'>{question}</span>", unsafe_allow_html=True)  # Display question in red
     previous_answers = []
@@ -16,44 +17,60 @@ def process_question(question, model_name, open_router_key, openai_api_key):
         while True:
             gen_prompt = create_gen_prompt(question, previous_answers)
             try:
-                new_answer = chat_with_model(prompt=gen_prompt, model=model_name, open_router_key=open_router_key, openai_api_key=openai_api_key)
             except Exception as e:
-                st.write(f"<span style='color:red'>Error generating answer: {str(e)}</span>", unsafe_allow_html=True)  # Display error in red
                 break
             judge_prompt = create_judge_prompt(question, new_answer)
             judge = "openai/gpt-4o-mini"
             try:
-                judge_response = chat_with_model(prompt=judge_prompt, model=judge, open_router_key=open_router_key, openai_api_key=openai_api_key)
             except Exception as e:
-                st.write(f"<span style='color:red'>Error getting judge response: {str(e)}</span>", unsafe_allow_html=True)  # Display error in red
                 break
             coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
             if coherence_score <= 3:
-                st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>", unsafe_allow_html=True)  # Display warning in yellow
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
             if novelty_score < 0.1:
-                st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>", unsafe_allow_html=True)  # Display warning in yellow
                 break
             st.write(f"**New Answer:**\n{new_answer}")
-            st.write(f"<span style='color:green'>Coherence Score: {coherence_score}</span>", unsafe_allow_html=True)  # Display coherence score in green
             st.write(f"**Novelty Score:** {novelty_score}")
             previous_answers.append(new_answer)
             question_novelty += novelty_score
     except Exception as e:
-        st.write(f"<span style='color:red'>Unexpected error processing question: {str(e)}</span>", unsafe_allow_html=True)  # Display error in red
     time_taken = time.time() - start_time
-    st.write(f"<span style='color:blue'>Total novelty score for this question: {question_novelty}</span>", unsafe_allow_html=True)  # Display novelty score in blue
-    st.write(f"<span style='color:blue'>Time taken: {time_taken} seconds</span>", unsafe_allow_html=True)  # Display time taken in blue
     return question_novelty, [
         {
@@ -86,14 +103,22 @@ def get_novelty_score(new_answer: str, previous_answers: list, openai_api_key):
     return novelty
-def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key):
     novelty_score = 0
     print_lock = threading.Lock()  # Lock for thread-safe printing
     results = []
-    with ThreadPoolExecutor(max_workers=len(questions)) as executor:
         future_to_question = {executor.submit(
-            process_question, question, model_name, open_router_key, openai_api_key): question for question in questions}
         for future in as_completed(future_to_question):
             question = future_to_question[future]
@@ -117,7 +142,7 @@ def benchmark_model_sequential(model_name, questions, open_router_key, openai_ap
     results = []
     for i, question in enumerate(questions):
-        question_novelty, question_results = process_question(question, model_name, open_router_key, openai_api_key)
         novelty_score += question_novelty
         results.extend(question_results)
         st.write(f"<span style='color:yellow'>Total novelty score across processed questions: {novelty_score}</span>", unsafe_allow_html=True)  # Display progress after each question

 import threading
 import streamlit as st  # Import Streamlit
+def process_question(question, model_name, open_router_key, openai_api_key, progress_lock, completed_questions, total_questions):
     start_time = time.time()
     st.write(f"<span style='color:red'>{question}</span>", unsafe_allow_html=True)  # Display question in red
     previous_answers = []
         while True:
             gen_prompt = create_gen_prompt(question, previous_answers)
             try:
+                new_answer = chat_with_model(prompt=gen_prompt, model=model_name, open_router_key=open_router_key,
+                                             openai_api_key=openai_api_key)
             except Exception as e:
+                st.write(f"<span style='color:red'>Error generating answer: {str(e)}</span>",
+                         unsafe_allow_html=True)  # Display error in red
                 break
             judge_prompt = create_judge_prompt(question, new_answer)
             judge = "openai/gpt-4o-mini"
             try:
+                judge_response = chat_with_model(prompt=judge_prompt, model=judge, open_router_key=open_router_key,
+                                                 openai_api_key=openai_api_key)
             except Exception as e:
+                st.write(f"<span style='color:red'>Error getting judge response: {str(e)}</span>",
+                         unsafe_allow_html=True)  # Display error in red
                 break
             coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
             if coherence_score <= 3:
+                st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
+                         unsafe_allow_html=True)  # Display warning in yellow
                 break
             novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
             if novelty_score < 0.1:
+                st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
+                         unsafe_allow_html=True)  # Display warning in yellow
                 break
             st.write(f"**New Answer:**\n{new_answer}")
+            st.write(f"<span style='color:green'>Coherence Score: {coherence_score}</span>",
+                     unsafe_allow_html=True)  # Display coherence score in green
             st.write(f"**Novelty Score:** {novelty_score}")
             previous_answers.append(new_answer)
             question_novelty += novelty_score
     except Exception as e:
+        st.write(f"<span style='color:red'>Unexpected error processing question: {str(e)}</span>",
+                 unsafe_allow_html=True)  # Display error in red
     time_taken = time.time() - start_time
+    st.write(f"<span style='color:blue'>Total novelty score for this question: {question_novelty}</span>",
+             unsafe_allow_html=True)  # Display novelty score in blue
+    st.write(f"<span style='color:blue'>Time taken: {time_taken} seconds</span>",
+             unsafe_allow_html=True)  # Display time taken in blue
+    # Update progress
+    with progress_lock:
+        completed_questions += 1
+        progress = completed_questions / total_questions
+        st.progress(progress)  # Update the progress bar
     return question_novelty, [
         {
     return novelty
+def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key, max_threads=None):
     novelty_score = 0
     print_lock = threading.Lock()  # Lock for thread-safe printing
     results = []
+    completed_questions = 0  # Shared variable to track progress
+    progress_lock = threading.Lock()  # Lock for protecting completed_questions
+    # Use max_threads if provided, otherwise default to the number of questions
+    if max_threads is None:
+        max_workers = len(questions)
+    else:
+        max_workers = max_threads
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
         future_to_question = {executor.submit(
+            process_question, question, model_name, open_router_key, openai_api_key, progress_lock, completed_questions, len(questions)): question for question in questions}
         for future in as_completed(future_to_question):
             question = future_to_question[future]
     results = []
     for i, question in enumerate(questions):
+        question_novelty, question_results = process_question(question, model_name, open_router_key, openai_api_key, threading.Lock(), i, len(questions))
         novelty_score += question_novelty
         results.extend(question_results)
         st.write(f"<span style='color:yellow'>Total novelty score across processed questions: {novelty_score}</span>", unsafe_allow_html=True)  # Display progress after each question