Added single-question review
Browse files- app.py +205 -79
- requirements.txt +3 -0
app.py
CHANGED
@@ -5,6 +5,8 @@ import os
|
|
5 |
import requests
|
6 |
import json
|
7 |
from dotenv import load_dotenv
|
|
|
|
|
8 |
|
9 |
# Load environment variables
|
10 |
load_dotenv()
|
@@ -37,7 +39,7 @@ def get_response(question, model):
|
|
37 |
"""Get response from OpenRouter API for the given question and model."""
|
38 |
headers = {
|
39 |
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
40 |
-
"HTTP-Referer": "http://localhost:7860",
|
41 |
"Content-Type": "application/json"
|
42 |
}
|
43 |
|
@@ -89,102 +91,226 @@ def read_questions(file_obj):
|
|
89 |
raise gr.Error(f"Maximum {MAX_QUESTIONS} questions allowed.")
|
90 |
return questions
|
91 |
|
92 |
-
with gr.Blocks() as demo:
|
93 |
-
gr.Markdown("#
|
|
|
|
|
|
|
94 |
|
95 |
file_input = gr.File(label="Upload your questions (.txt)")
|
96 |
-
|
|
|
|
|
|
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
response2_i = gr.Textbox(label="Response 2", interactive=False, lines=4)
|
114 |
-
gr.Markdown("---")
|
115 |
-
|
116 |
-
response_areas.append({
|
117 |
-
'group': group_i,
|
118 |
-
'model1': model1_i,
|
119 |
-
'response1': response1_i,
|
120 |
-
'model2': model2_i,
|
121 |
-
'response2': response2_i
|
122 |
-
})
|
123 |
-
|
124 |
-
def process_file(file):
|
125 |
-
"""Show/hide question groups depending on how many questions are in the file."""
|
126 |
if file is None:
|
127 |
raise gr.Error("Please upload a file first.")
|
128 |
questions = read_questions(file)
|
|
|
129 |
|
130 |
-
#
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
-
def
|
138 |
-
|
139 |
-
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
for i, question in enumerate(questions):
|
146 |
-
# 1) Pick first model, yield it
|
147 |
-
model_1 = random.choice(MODELS)
|
148 |
-
updates[i*4] = gr.update(value=f"**{model_1}**") # model1 for question i
|
149 |
-
yield updates # partial update (reveal model_1 accordion)
|
150 |
-
|
151 |
-
# 2) Get response from model_1
|
152 |
-
for response_1 in get_response(question, model_1):
|
153 |
-
updates[i*4 + 1] = gr.update(value=response_1) # response1
|
154 |
-
yield updates
|
155 |
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
# Connect events
|
176 |
file_input.change(
|
177 |
fn=process_file,
|
178 |
-
inputs=file_input,
|
179 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
)
|
181 |
|
182 |
run_button.click(
|
183 |
-
fn=
|
184 |
-
inputs=
|
185 |
-
outputs=
|
|
|
|
|
|
|
|
|
|
|
186 |
)
|
187 |
|
|
|
|
|
|
|
188 |
# Enable queue for partial outputs to appear as they are yielded
|
189 |
demo.queue()
|
190 |
-
|
|
|
|
|
|
|
|
5 |
import requests
|
6 |
import json
|
7 |
from dotenv import load_dotenv
|
8 |
+
import threading
|
9 |
+
from queue import Queue, Empty
|
10 |
|
11 |
# Load environment variables
|
12 |
load_dotenv()
|
|
|
39 |
"""Get response from OpenRouter API for the given question and model."""
|
40 |
headers = {
|
41 |
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
42 |
+
"HTTP-Referer": "${SPACE_ID}.hf.space" if os.getenv('SPACE_ID') else "http://localhost:7860",
|
43 |
"Content-Type": "application/json"
|
44 |
}
|
45 |
|
|
|
91 |
raise gr.Error(f"Maximum {MAX_QUESTIONS} questions allowed.")
|
92 |
return questions
|
93 |
|
94 |
+
with gr.Blocks(title="Vibesmark Test Suite") as demo:
|
95 |
+
gr.Markdown("# Vibesmark Test Suite\nUpload a `.txt` file with **one question per line**.")
|
96 |
+
|
97 |
+
# Store current state
|
98 |
+
state = gr.State({"questions": [], "current_index": 0})
|
99 |
|
100 |
file_input = gr.File(label="Upload your questions (.txt)")
|
101 |
+
with gr.Row():
|
102 |
+
prev_btn = gr.Button("← Previous", interactive=False)
|
103 |
+
question_counter = gr.Markdown("Question 0 / 0")
|
104 |
+
next_btn = gr.Button("Next →", interactive=False)
|
105 |
|
106 |
+
with gr.Group() as question_group:
|
107 |
+
question_display = gr.Markdown("### Upload a file to begin")
|
108 |
+
with gr.Row():
|
109 |
+
with gr.Column():
|
110 |
+
with gr.Accordion("Model 1", open=False):
|
111 |
+
model1_display = gr.Markdown("")
|
112 |
+
response1_display = gr.Textbox(label="Response 1", interactive=False, lines=4)
|
113 |
+
with gr.Column():
|
114 |
+
with gr.Accordion("Model 2", open=False):
|
115 |
+
model2_display = gr.Markdown("")
|
116 |
+
response2_display = gr.Textbox(label="Response 2", interactive=False, lines=4)
|
117 |
+
|
118 |
+
run_button = gr.Button("Run Comparison", variant="primary")
|
119 |
+
|
120 |
+
def process_file(file, state):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
if file is None:
|
122 |
raise gr.Error("Please upload a file first.")
|
123 |
questions = read_questions(file)
|
124 |
+
new_state = {"questions": questions, "current_index": 0}
|
125 |
|
126 |
+
# Return outputs in order matching the outputs list in the event handler
|
127 |
+
return [
|
128 |
+
f"### Question 1:\n{questions[0]}", # question_display
|
129 |
+
f"Question 1 / {len(questions)}", # question_counter
|
130 |
+
gr.update(interactive=False), # prev_btn
|
131 |
+
gr.update(interactive=len(questions) > 1), # next_btn
|
132 |
+
gr.update(value=""), # model1_display
|
133 |
+
gr.update(value=""), # response1_display
|
134 |
+
gr.update(value=""), # model2_display
|
135 |
+
gr.update(value=""), # response2_display
|
136 |
+
new_state # state
|
137 |
+
]
|
138 |
|
139 |
+
def navigate_question(direction, state):
|
140 |
+
questions = state["questions"]
|
141 |
+
current_index = state["current_index"]
|
142 |
|
143 |
+
if direction == "next" and current_index < len(questions) - 1:
|
144 |
+
current_index += 1
|
145 |
+
elif direction == "prev" and current_index > 0:
|
146 |
+
current_index -= 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
+
new_state = state.copy()
|
149 |
+
new_state["current_index"] = current_index
|
150 |
+
|
151 |
+
# Return outputs in order matching the outputs list in the event handler
|
152 |
+
return [
|
153 |
+
f"### Question {current_index + 1}:\n{questions[current_index]}", # question_display
|
154 |
+
f"Question {current_index + 1} / {len(questions)}", # question_counter
|
155 |
+
gr.update(interactive=current_index > 0), # prev_btn
|
156 |
+
gr.update(interactive=current_index < len(questions) - 1), # next_btn
|
157 |
+
gr.update(value=""), # model1_display
|
158 |
+
gr.update(value=""), # response1_display
|
159 |
+
gr.update(value=""), # model2_display
|
160 |
+
gr.update(value=""), # response2_display
|
161 |
+
new_state # state
|
162 |
+
]
|
163 |
+
|
164 |
+
def get_responses_in_parallel(question, model1, model2):
|
165 |
+
"""
|
166 |
+
Spawn two threads to run get_response for each model in parallel,
|
167 |
+
queuing partial responses as they arrive. Yields tuples of
|
168 |
+
(partial_response_model1, partial_response_model2).
|
169 |
+
"""
|
170 |
+
queue1 = Queue()
|
171 |
+
queue2 = Queue()
|
172 |
+
|
173 |
+
def fill_queue(q, question, model):
|
174 |
+
for partial_response in get_response(question, model):
|
175 |
+
q.put(partial_response)
|
176 |
+
q.put(None) # Sentinel indicating completion
|
177 |
+
|
178 |
+
# Spawn threads
|
179 |
+
t1 = threading.Thread(target=fill_queue, args=(queue1, question, model1))
|
180 |
+
t2 = threading.Thread(target=fill_queue, args=(queue2, question, model2))
|
181 |
+
t1.start()
|
182 |
+
t2.start()
|
183 |
+
|
184 |
+
# Initialize trackers
|
185 |
+
partial1 = ""
|
186 |
+
partial2 = ""
|
187 |
+
done1 = False
|
188 |
+
done2 = False
|
189 |
+
|
190 |
+
# Keep yielding as long as at least one thread is still producing
|
191 |
+
while not (done1 and done2):
|
192 |
+
try:
|
193 |
+
item1 = queue1.get(timeout=0.1)
|
194 |
+
if item1 is None:
|
195 |
+
done1 = True
|
196 |
+
else:
|
197 |
+
partial1 = item1
|
198 |
+
except Empty:
|
199 |
+
pass
|
200 |
+
|
201 |
+
try:
|
202 |
+
item2 = queue2.get(timeout=0.1)
|
203 |
+
if item2 is None:
|
204 |
+
done2 = True
|
205 |
+
else:
|
206 |
+
partial2 = item2
|
207 |
+
except Empty:
|
208 |
+
pass
|
209 |
+
|
210 |
+
yield partial1, partial2
|
211 |
+
|
212 |
+
# Join threads and finish
|
213 |
+
t1.join()
|
214 |
+
t2.join()
|
215 |
+
|
216 |
+
def run_comparison(state):
|
217 |
+
"""
|
218 |
+
Run comparison for the current question, streaming both models'
|
219 |
+
responses in parallel.
|
220 |
+
"""
|
221 |
+
if not state["questions"]:
|
222 |
+
raise gr.Error("Please upload a file first.")
|
223 |
+
|
224 |
+
current_question = state["questions"][state["current_index"]]
|
225 |
+
|
226 |
+
# Pick two distinct models
|
227 |
+
model_1 = random.choice(MODELS)
|
228 |
+
remaining_models = [m for m in MODELS if m != model_1]
|
229 |
+
model_2 = random.choice(remaining_models)
|
230 |
+
|
231 |
+
# Initial yield to display chosen models
|
232 |
+
yield [
|
233 |
+
gr.update(value=f"**{model_1}**"),
|
234 |
+
gr.update(value=""),
|
235 |
+
gr.update(value=f"**{model_2}**"),
|
236 |
+
gr.update(value="")
|
237 |
+
]
|
238 |
+
|
239 |
+
# Now stream both model responses in parallel
|
240 |
+
for partial1, partial2 in get_responses_in_parallel(current_question, model_1, model_2):
|
241 |
+
yield [
|
242 |
+
gr.update(value=f"**{model_1}**"),
|
243 |
+
gr.update(value=partial1),
|
244 |
+
gr.update(value=f"**{model_2}**"),
|
245 |
+
gr.update(value=partial2)
|
246 |
+
]
|
247 |
|
248 |
# Connect events
|
249 |
file_input.change(
|
250 |
fn=process_file,
|
251 |
+
inputs=[file_input, state],
|
252 |
+
outputs=[
|
253 |
+
question_display,
|
254 |
+
question_counter,
|
255 |
+
prev_btn,
|
256 |
+
next_btn,
|
257 |
+
model1_display,
|
258 |
+
response1_display,
|
259 |
+
model2_display,
|
260 |
+
response2_display,
|
261 |
+
state
|
262 |
+
]
|
263 |
+
)
|
264 |
+
|
265 |
+
prev_btn.click(
|
266 |
+
fn=lambda state: navigate_question("prev", state),
|
267 |
+
inputs=[state],
|
268 |
+
outputs=[
|
269 |
+
question_display,
|
270 |
+
question_counter,
|
271 |
+
prev_btn,
|
272 |
+
next_btn,
|
273 |
+
model1_display,
|
274 |
+
response1_display,
|
275 |
+
model2_display,
|
276 |
+
response2_display,
|
277 |
+
state
|
278 |
+
]
|
279 |
+
)
|
280 |
+
|
281 |
+
next_btn.click(
|
282 |
+
fn=lambda state: navigate_question("next", state),
|
283 |
+
inputs=[state],
|
284 |
+
outputs=[
|
285 |
+
question_display,
|
286 |
+
question_counter,
|
287 |
+
prev_btn,
|
288 |
+
next_btn,
|
289 |
+
model1_display,
|
290 |
+
response1_display,
|
291 |
+
model2_display,
|
292 |
+
response2_display,
|
293 |
+
state
|
294 |
+
]
|
295 |
)
|
296 |
|
297 |
run_button.click(
|
298 |
+
fn=run_comparison,
|
299 |
+
inputs=[state],
|
300 |
+
outputs=[
|
301 |
+
model1_display,
|
302 |
+
response1_display,
|
303 |
+
model2_display,
|
304 |
+
response2_display
|
305 |
+
]
|
306 |
)
|
307 |
|
308 |
+
# Add footer with subtle styling
|
309 |
+
gr.Markdown("<p style='color: #666; font-size: 0.8em; text-align: center; margin-top: 2em;'>Homegrown software from the Chateau</p>")
|
310 |
+
|
311 |
# Enable queue for partial outputs to appear as they are yielded
|
312 |
demo.queue()
|
313 |
+
|
314 |
+
# Launch with the appropriate host setting for deployment
|
315 |
+
if __name__ == "__main__":
|
316 |
+
demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio>=4.0.0
|
2 |
+
requests>=2.31.0
|
3 |
+
python-dotenv>=1.0.0
|