Presidentlin commited on
Commit
9d92eeb
·
1 Parent(s): 9a229f8
Files changed (3) hide show
  1. __pycache__/main.cpython-310.pyc +0 -0
  2. app.py +8 -2
  3. main.py +40 -15
__pycache__/main.cpython-310.pyc CHANGED
Binary files a/__pycache__/main.cpython-310.pyc and b/__pycache__/main.cpython-310.pyc differ
 
app.py CHANGED
@@ -95,6 +95,12 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
95
  # Choose execution mode
96
  execution_mode = st.radio("Execution Mode:", ["Sequential", "Multithreaded"])
97
 
 
 
 
 
 
 
98
  # Benchmark Execution
99
  if st.button("Start Benchmark"):
100
  if not selected_questions:
@@ -115,9 +121,9 @@ if st.session_state.open_router_key and st.session_state.openai_api_key:
115
 
116
  # ... (benchmarking logic using the chosen execution mode)
117
  if execution_mode == "Sequential":
118
- question_results = benchmark_model_sequential(model_name, [question], st.session_state.open_router_key, st.session_state.openai_api_key)
119
  else: # Multithreaded
120
- question_results = benchmark_model_multithreaded(model_name, [question], st.session_state.open_router_key, st.session_state.openai_api_key)
121
 
122
  results.extend(question_results)
123
 
 
95
  # Choose execution mode
96
  execution_mode = st.radio("Execution Mode:", ["Sequential", "Multithreaded"])
97
 
98
+ # If multithreaded, allow user to configure thread pool size
99
+ if execution_mode == "Multithreaded":
100
+ max_threads = st.slider("Maximum Number of Threads:", 1, 10, 4) # Default to 4 threads
101
+ else:
102
+ max_threads = None # For sequential mode
103
+
104
  # Benchmark Execution
105
  if st.button("Start Benchmark"):
106
  if not selected_questions:
 
121
 
122
  # ... (benchmarking logic using the chosen execution mode)
123
  if execution_mode == "Sequential":
124
+ question_results = benchmark_model_sequential(model_name, selected_questions, st.session_state.open_router_key, st.session_state.openai_api_key)
125
  else: # Multithreaded
126
+ question_results = benchmark_model_multithreaded(model_name, selected_questions, st.session_state.open_router_key, st.session_state.openai_api_key, max_threads)
127
 
128
  results.extend(question_results)
129
 
main.py CHANGED
@@ -6,7 +6,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
6
  import threading
7
  import streamlit as st # Import Streamlit
8
 
9
- def process_question(question, model_name, open_router_key, openai_api_key):
 
10
  start_time = time.time()
11
  st.write(f"<span style='color:red'>{question}</span>", unsafe_allow_html=True) # Display question in red
12
  previous_answers = []
@@ -16,44 +17,60 @@ def process_question(question, model_name, open_router_key, openai_api_key):
16
  while True:
17
  gen_prompt = create_gen_prompt(question, previous_answers)
18
  try:
19
- new_answer = chat_with_model(prompt=gen_prompt, model=model_name, open_router_key=open_router_key, openai_api_key=openai_api_key)
 
20
  except Exception as e:
21
- st.write(f"<span style='color:red'>Error generating answer: {str(e)}</span>", unsafe_allow_html=True) # Display error in red
 
22
  break
23
 
24
  judge_prompt = create_judge_prompt(question, new_answer)
25
  judge = "openai/gpt-4o-mini"
26
  try:
27
- judge_response = chat_with_model(prompt=judge_prompt, model=judge, open_router_key=open_router_key, openai_api_key=openai_api_key)
 
28
  except Exception as e:
29
- st.write(f"<span style='color:red'>Error getting judge response: {str(e)}</span>", unsafe_allow_html=True) # Display error in red
 
30
  break
31
 
32
  coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
33
 
34
  if coherence_score <= 3:
35
- st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>", unsafe_allow_html=True) # Display warning in yellow
 
36
  break
37
 
38
  novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
39
 
40
  if novelty_score < 0.1:
41
- st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>", unsafe_allow_html=True) # Display warning in yellow
 
42
  break
43
 
44
  st.write(f"**New Answer:**\n{new_answer}")
45
- st.write(f"<span style='color:green'>Coherence Score: {coherence_score}</span>", unsafe_allow_html=True) # Display coherence score in green
 
46
  st.write(f"**Novelty Score:** {novelty_score}")
47
 
48
  previous_answers.append(new_answer)
49
  question_novelty += novelty_score
50
 
51
  except Exception as e:
52
- st.write(f"<span style='color:red'>Unexpected error processing question: {str(e)}</span>", unsafe_allow_html=True) # Display error in red
 
53
 
54
  time_taken = time.time() - start_time
55
- st.write(f"<span style='color:blue'>Total novelty score for this question: {question_novelty}</span>", unsafe_allow_html=True) # Display novelty score in blue
56
- st.write(f"<span style='color:blue'>Time taken: {time_taken} seconds</span>", unsafe_allow_html=True) # Display time taken in blue
 
 
 
 
 
 
 
 
57
 
58
  return question_novelty, [
59
  {
@@ -86,14 +103,22 @@ def get_novelty_score(new_answer: str, previous_answers: list, openai_api_key):
86
  return novelty
87
 
88
 
89
- def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key):
90
  novelty_score = 0
91
  print_lock = threading.Lock() # Lock for thread-safe printing
92
  results = []
 
 
 
 
 
 
 
 
93
 
94
- with ThreadPoolExecutor(max_workers=len(questions)) as executor:
95
  future_to_question = {executor.submit(
96
- process_question, question, model_name, open_router_key, openai_api_key): question for question in questions}
97
 
98
  for future in as_completed(future_to_question):
99
  question = future_to_question[future]
@@ -117,7 +142,7 @@ def benchmark_model_sequential(model_name, questions, open_router_key, openai_ap
117
  results = []
118
 
119
  for i, question in enumerate(questions):
120
- question_novelty, question_results = process_question(question, model_name, open_router_key, openai_api_key)
121
  novelty_score += question_novelty
122
  results.extend(question_results)
123
  st.write(f"<span style='color:yellow'>Total novelty score across processed questions: {novelty_score}</span>", unsafe_allow_html=True) # Display progress after each question
 
6
  import threading
7
  import streamlit as st # Import Streamlit
8
 
9
+
10
+ def process_question(question, model_name, open_router_key, openai_api_key, progress_lock, completed_questions, total_questions):
11
  start_time = time.time()
12
  st.write(f"<span style='color:red'>{question}</span>", unsafe_allow_html=True) # Display question in red
13
  previous_answers = []
 
17
  while True:
18
  gen_prompt = create_gen_prompt(question, previous_answers)
19
  try:
20
+ new_answer = chat_with_model(prompt=gen_prompt, model=model_name, open_router_key=open_router_key,
21
+ openai_api_key=openai_api_key)
22
  except Exception as e:
23
+ st.write(f"<span style='color:red'>Error generating answer: {str(e)}</span>",
24
+ unsafe_allow_html=True) # Display error in red
25
  break
26
 
27
  judge_prompt = create_judge_prompt(question, new_answer)
28
  judge = "openai/gpt-4o-mini"
29
  try:
30
+ judge_response = chat_with_model(prompt=judge_prompt, model=judge, open_router_key=open_router_key,
31
+ openai_api_key=openai_api_key)
32
  except Exception as e:
33
+ st.write(f"<span style='color:red'>Error getting judge response: {str(e)}</span>",
34
+ unsafe_allow_html=True) # Display error in red
35
  break
36
 
37
  coherence_score = int(judge_response.split("<coherence_score>")[1].split("</coherence_score>")[0])
38
 
39
  if coherence_score <= 3:
40
+ st.write("<span style='color:yellow'>Output is incoherent. Moving to next question.</span>",
41
+ unsafe_allow_html=True) # Display warning in yellow
42
  break
43
 
44
  novelty_score = get_novelty_score(new_answer, previous_answers, openai_api_key)
45
 
46
  if novelty_score < 0.1:
47
+ st.write("<span style='color:yellow'>Output is redundant. Moving to next question.</span>",
48
+ unsafe_allow_html=True) # Display warning in yellow
49
  break
50
 
51
  st.write(f"**New Answer:**\n{new_answer}")
52
+ st.write(f"<span style='color:green'>Coherence Score: {coherence_score}</span>",
53
+ unsafe_allow_html=True) # Display coherence score in green
54
  st.write(f"**Novelty Score:** {novelty_score}")
55
 
56
  previous_answers.append(new_answer)
57
  question_novelty += novelty_score
58
 
59
  except Exception as e:
60
+ st.write(f"<span style='color:red'>Unexpected error processing question: {str(e)}</span>",
61
+ unsafe_allow_html=True) # Display error in red
62
 
63
  time_taken = time.time() - start_time
64
+ st.write(f"<span style='color:blue'>Total novelty score for this question: {question_novelty}</span>",
65
+ unsafe_allow_html=True) # Display novelty score in blue
66
+ st.write(f"<span style='color:blue'>Time taken: {time_taken} seconds</span>",
67
+ unsafe_allow_html=True) # Display time taken in blue
68
+
69
+ # Update progress
70
+ with progress_lock:
71
+ completed_questions += 1
72
+ progress = completed_questions / total_questions
73
+ st.progress(progress) # Update the progress bar
74
 
75
  return question_novelty, [
76
  {
 
103
  return novelty
104
 
105
 
106
+ def benchmark_model_multithreaded(model_name, questions, open_router_key, openai_api_key, max_threads=None):
107
  novelty_score = 0
108
  print_lock = threading.Lock() # Lock for thread-safe printing
109
  results = []
110
+ completed_questions = 0 # Shared variable to track progress
111
+ progress_lock = threading.Lock() # Lock for protecting completed_questions
112
+
113
+ # Use max_threads if provided, otherwise default to the number of questions
114
+ if max_threads is None:
115
+ max_workers = len(questions)
116
+ else:
117
+ max_workers = max_threads
118
 
119
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
120
  future_to_question = {executor.submit(
121
+ process_question, question, model_name, open_router_key, openai_api_key, progress_lock, completed_questions, len(questions)): question for question in questions}
122
 
123
  for future in as_completed(future_to_question):
124
  question = future_to_question[future]
 
142
  results = []
143
 
144
  for i, question in enumerate(questions):
145
+ question_novelty, question_results = process_question(question, model_name, open_router_key, openai_api_key, threading.Lock(), i, len(questions))
146
  novelty_score += question_novelty
147
  results.extend(question_results)
148
  st.write(f"<span style='color:yellow'>Total novelty score across processed questions: {novelty_score}</span>", unsafe_allow_html=True) # Display progress after each question