atin121 commited on
Commit
be0c1cc
·
1 Parent(s): c60890d

Added single-question review

Browse files
Files changed (2) hide show
  1. app.py +205 -79
  2. requirements.txt +3 -0
app.py CHANGED
@@ -5,6 +5,8 @@ import os
5
  import requests
6
  import json
7
  from dotenv import load_dotenv
 
 
8
 
9
  # Load environment variables
10
  load_dotenv()
@@ -37,7 +39,7 @@ def get_response(question, model):
37
  """Get response from OpenRouter API for the given question and model."""
38
  headers = {
39
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
40
- "HTTP-Referer": "http://localhost:7860", # Replace with your actual domain
41
  "Content-Type": "application/json"
42
  }
43
 
@@ -89,102 +91,226 @@ def read_questions(file_obj):
89
  raise gr.Error(f"Maximum {MAX_QUESTIONS} questions allowed.")
90
  return questions
91
 
92
- with gr.Blocks() as demo:
93
- gr.Markdown("# Vibes Benchmark\nUpload a `.txt` file with **one question per line**.")
 
 
 
94
 
95
  file_input = gr.File(label="Upload your questions (.txt)")
96
- run_button = gr.Button("Run Benchmark", variant="primary")
 
 
 
97
 
98
- # Create dynamic response areas
99
- response_areas = []
100
- for i in range(MAX_QUESTIONS):
101
- with gr.Group(visible=False) as group_i:
102
- gr.Markdown(f"### Question {i+1}")
103
- with gr.Row():
104
- with gr.Column():
105
- # Accordion for Model 1
106
- with gr.Accordion("Model 1", open=False):
107
- model1_i = gr.Markdown("")
108
- response1_i = gr.Textbox(label="Response 1", interactive=False, lines=4)
109
- with gr.Column():
110
- # Accordion for Model 2
111
- with gr.Accordion("Model 2", open=False):
112
- model2_i = gr.Markdown("")
113
- response2_i = gr.Textbox(label="Response 2", interactive=False, lines=4)
114
- gr.Markdown("---")
115
-
116
- response_areas.append({
117
- 'group': group_i,
118
- 'model1': model1_i,
119
- 'response1': response1_i,
120
- 'model2': model2_i,
121
- 'response2': response2_i
122
- })
123
-
124
- def process_file(file):
125
- """Show/hide question groups depending on how many questions are in the file."""
126
  if file is None:
127
  raise gr.Error("Please upload a file first.")
128
  questions = read_questions(file)
 
129
 
130
- # Show as many question groups as needed; hide the rest
131
- updates = []
132
- for i in range(MAX_QUESTIONS):
133
- updates.append(gr.update(visible=(i < len(questions))))
134
-
135
- return updates
 
 
 
 
 
 
136
 
137
- def run_benchmark(file):
138
- """Generator function yielding partial updates in real time."""
139
- questions = read_questions(file)
140
 
141
- # Initialize all update values as blank
142
- updates = [gr.update(value="")] * (MAX_QUESTIONS * 4)
143
-
144
- # Process each question, 2 models per question
145
- for i, question in enumerate(questions):
146
- # 1) Pick first model, yield it
147
- model_1 = random.choice(MODELS)
148
- updates[i*4] = gr.update(value=f"**{model_1}**") # model1 for question i
149
- yield updates # partial update (reveal model_1 accordion)
150
-
151
- # 2) Get response from model_1
152
- for response_1 in get_response(question, model_1):
153
- updates[i*4 + 1] = gr.update(value=response_1) # response1
154
- yield updates
155
 
156
- # 3) Pick second model (ensure different from first), yield it
157
- remaining_models = [m for m in MODELS if m != model_1]
158
- model_2 = random.choice(remaining_models)
159
- updates[i*4 + 2] = gr.update(value=f"**{model_2}**") # model2
160
- yield updates
161
-
162
- # 4) Get response from model_2
163
- for response_2 in get_response(question, model_2):
164
- updates[i*4 + 3] = gr.update(value=response_2) # response2
165
- yield updates
166
-
167
- # The outputs we update after each yield
168
- update_targets = []
169
- for area in response_areas:
170
- update_targets.append(area['model1'])
171
- update_targets.append(area['response1'])
172
- update_targets.append(area['model2'])
173
- update_targets.append(area['response2'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
  # Connect events
176
  file_input.change(
177
  fn=process_file,
178
- inputs=file_input,
179
- outputs=[area['group'] for area in response_areas]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  )
181
 
182
  run_button.click(
183
- fn=run_benchmark,
184
- inputs=file_input,
185
- outputs=update_targets
 
 
 
 
 
186
  )
187
 
 
 
 
188
  # Enable queue for partial outputs to appear as they are yielded
189
  demo.queue()
190
- demo.launch()
 
 
 
 
5
  import requests
6
  import json
7
  from dotenv import load_dotenv
8
+ import threading
9
+ from queue import Queue, Empty
10
 
11
  # Load environment variables
12
  load_dotenv()
 
39
  """Get response from OpenRouter API for the given question and model."""
40
  headers = {
41
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
42
+ "HTTP-Referer": "${SPACE_ID}.hf.space" if os.getenv('SPACE_ID') else "http://localhost:7860",
43
  "Content-Type": "application/json"
44
  }
45
 
 
91
  raise gr.Error(f"Maximum {MAX_QUESTIONS} questions allowed.")
92
  return questions
93
 
94
+ with gr.Blocks(title="Vibesmark Test Suite") as demo:
95
+ gr.Markdown("# Vibesmark Test Suite\nUpload a `.txt` file with **one question per line**.")
96
+
97
+ # Store current state
98
+ state = gr.State({"questions": [], "current_index": 0})
99
 
100
  file_input = gr.File(label="Upload your questions (.txt)")
101
+ with gr.Row():
102
+ prev_btn = gr.Button("← Previous", interactive=False)
103
+ question_counter = gr.Markdown("Question 0 / 0")
104
+ next_btn = gr.Button("Next →", interactive=False)
105
 
106
+ with gr.Group() as question_group:
107
+ question_display = gr.Markdown("### Upload a file to begin")
108
+ with gr.Row():
109
+ with gr.Column():
110
+ with gr.Accordion("Model 1", open=False):
111
+ model1_display = gr.Markdown("")
112
+ response1_display = gr.Textbox(label="Response 1", interactive=False, lines=4)
113
+ with gr.Column():
114
+ with gr.Accordion("Model 2", open=False):
115
+ model2_display = gr.Markdown("")
116
+ response2_display = gr.Textbox(label="Response 2", interactive=False, lines=4)
117
+
118
+ run_button = gr.Button("Run Comparison", variant="primary")
119
+
120
+ def process_file(file, state):
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  if file is None:
122
  raise gr.Error("Please upload a file first.")
123
  questions = read_questions(file)
124
+ new_state = {"questions": questions, "current_index": 0}
125
 
126
+ # Return outputs in order matching the outputs list in the event handler
127
+ return [
128
+ f"### Question 1:\n{questions[0]}", # question_display
129
+ f"Question 1 / {len(questions)}", # question_counter
130
+ gr.update(interactive=False), # prev_btn
131
+ gr.update(interactive=len(questions) > 1), # next_btn
132
+ gr.update(value=""), # model1_display
133
+ gr.update(value=""), # response1_display
134
+ gr.update(value=""), # model2_display
135
+ gr.update(value=""), # response2_display
136
+ new_state # state
137
+ ]
138
 
139
+ def navigate_question(direction, state):
140
+ questions = state["questions"]
141
+ current_index = state["current_index"]
142
 
143
+ if direction == "next" and current_index < len(questions) - 1:
144
+ current_index += 1
145
+ elif direction == "prev" and current_index > 0:
146
+ current_index -= 1
 
 
 
 
 
 
 
 
 
 
147
 
148
+ new_state = state.copy()
149
+ new_state["current_index"] = current_index
150
+
151
+ # Return outputs in order matching the outputs list in the event handler
152
+ return [
153
+ f"### Question {current_index + 1}:\n{questions[current_index]}", # question_display
154
+ f"Question {current_index + 1} / {len(questions)}", # question_counter
155
+ gr.update(interactive=current_index > 0), # prev_btn
156
+ gr.update(interactive=current_index < len(questions) - 1), # next_btn
157
+ gr.update(value=""), # model1_display
158
+ gr.update(value=""), # response1_display
159
+ gr.update(value=""), # model2_display
160
+ gr.update(value=""), # response2_display
161
+ new_state # state
162
+ ]
163
+
164
+ def get_responses_in_parallel(question, model1, model2):
165
+ """
166
+ Spawn two threads to run get_response for each model in parallel,
167
+ queuing partial responses as they arrive. Yields tuples of
168
+ (partial_response_model1, partial_response_model2).
169
+ """
170
+ queue1 = Queue()
171
+ queue2 = Queue()
172
+
173
+ def fill_queue(q, question, model):
174
+ for partial_response in get_response(question, model):
175
+ q.put(partial_response)
176
+ q.put(None) # Sentinel indicating completion
177
+
178
+ # Spawn threads
179
+ t1 = threading.Thread(target=fill_queue, args=(queue1, question, model1))
180
+ t2 = threading.Thread(target=fill_queue, args=(queue2, question, model2))
181
+ t1.start()
182
+ t2.start()
183
+
184
+ # Initialize trackers
185
+ partial1 = ""
186
+ partial2 = ""
187
+ done1 = False
188
+ done2 = False
189
+
190
+ # Keep yielding as long as at least one thread is still producing
191
+ while not (done1 and done2):
192
+ try:
193
+ item1 = queue1.get(timeout=0.1)
194
+ if item1 is None:
195
+ done1 = True
196
+ else:
197
+ partial1 = item1
198
+ except Empty:
199
+ pass
200
+
201
+ try:
202
+ item2 = queue2.get(timeout=0.1)
203
+ if item2 is None:
204
+ done2 = True
205
+ else:
206
+ partial2 = item2
207
+ except Empty:
208
+ pass
209
+
210
+ yield partial1, partial2
211
+
212
+ # Join threads and finish
213
+ t1.join()
214
+ t2.join()
215
+
216
+ def run_comparison(state):
217
+ """
218
+ Run comparison for the current question, streaming both models'
219
+ responses in parallel.
220
+ """
221
+ if not state["questions"]:
222
+ raise gr.Error("Please upload a file first.")
223
+
224
+ current_question = state["questions"][state["current_index"]]
225
+
226
+ # Pick two distinct models
227
+ model_1 = random.choice(MODELS)
228
+ remaining_models = [m for m in MODELS if m != model_1]
229
+ model_2 = random.choice(remaining_models)
230
+
231
+ # Initial yield to display chosen models
232
+ yield [
233
+ gr.update(value=f"**{model_1}**"),
234
+ gr.update(value=""),
235
+ gr.update(value=f"**{model_2}**"),
236
+ gr.update(value="")
237
+ ]
238
+
239
+ # Now stream both model responses in parallel
240
+ for partial1, partial2 in get_responses_in_parallel(current_question, model_1, model_2):
241
+ yield [
242
+ gr.update(value=f"**{model_1}**"),
243
+ gr.update(value=partial1),
244
+ gr.update(value=f"**{model_2}**"),
245
+ gr.update(value=partial2)
246
+ ]
247
 
248
  # Connect events
249
  file_input.change(
250
  fn=process_file,
251
+ inputs=[file_input, state],
252
+ outputs=[
253
+ question_display,
254
+ question_counter,
255
+ prev_btn,
256
+ next_btn,
257
+ model1_display,
258
+ response1_display,
259
+ model2_display,
260
+ response2_display,
261
+ state
262
+ ]
263
+ )
264
+
265
+ prev_btn.click(
266
+ fn=lambda state: navigate_question("prev", state),
267
+ inputs=[state],
268
+ outputs=[
269
+ question_display,
270
+ question_counter,
271
+ prev_btn,
272
+ next_btn,
273
+ model1_display,
274
+ response1_display,
275
+ model2_display,
276
+ response2_display,
277
+ state
278
+ ]
279
+ )
280
+
281
+ next_btn.click(
282
+ fn=lambda state: navigate_question("next", state),
283
+ inputs=[state],
284
+ outputs=[
285
+ question_display,
286
+ question_counter,
287
+ prev_btn,
288
+ next_btn,
289
+ model1_display,
290
+ response1_display,
291
+ model2_display,
292
+ response2_display,
293
+ state
294
+ ]
295
  )
296
 
297
  run_button.click(
298
+ fn=run_comparison,
299
+ inputs=[state],
300
+ outputs=[
301
+ model1_display,
302
+ response1_display,
303
+ model2_display,
304
+ response2_display
305
+ ]
306
  )
307
 
308
+ # Add footer with subtle styling
309
+ gr.Markdown("<p style='color: #666; font-size: 0.8em; text-align: center; margin-top: 2em;'>Homegrown software from the Chateau</p>")
310
+
311
  # Enable queue for partial outputs to appear as they are yielded
312
  demo.queue()
313
+
314
+ # Launch with the appropriate host setting for deployment
315
+ if __name__ == "__main__":
316
+ demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.0.0
2
+ requests>=2.31.0
3
+ python-dotenv>=1.0.0