Added streaming (but only one at a time)
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import random
|
|
3 |
import time
|
4 |
import os
|
5 |
import requests
|
|
|
6 |
from dotenv import load_dotenv
|
7 |
|
8 |
# Load environment variables
|
@@ -44,7 +45,8 @@ def get_response(question, model):
|
|
44 |
"model": model,
|
45 |
"messages": [
|
46 |
{"role": "user", "content": question}
|
47 |
-
]
|
|
|
48 |
}
|
49 |
|
50 |
try:
|
@@ -52,12 +54,29 @@ def get_response(question, model):
|
|
52 |
OPENROUTER_BASE_URL,
|
53 |
headers=headers,
|
54 |
json=data,
|
55 |
-
timeout=30 # 30 second timeout
|
|
|
56 |
)
|
57 |
response.raise_for_status()
|
58 |
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
except requests.exceptions.RequestException as e:
|
63 |
return f"Error: Failed to get response from {model}: {str(e)}"
|
@@ -120,8 +139,6 @@ with gr.Blocks() as demo:
|
|
120 |
questions = read_questions(file)
|
121 |
|
122 |
# Initialize all update values as blank
|
123 |
-
# We have 4 fields per question (model1, response1, model2, response2)
|
124 |
-
# => total of MAX_QUESTIONS * 4 output components
|
125 |
updates = [gr.update(value="")] * (MAX_QUESTIONS * 4)
|
126 |
|
127 |
# Process each question, 2 models per question
|
@@ -132,9 +149,9 @@ with gr.Blocks() as demo:
|
|
132 |
yield updates # partial update (reveal model_1 accordion)
|
133 |
|
134 |
# 2) Get response from model_1
|
135 |
-
response_1
|
136 |
-
|
137 |
-
|
138 |
|
139 |
# 3) Pick second model (ensure different from first), yield it
|
140 |
remaining_models = [m for m in MODELS if m != model_1]
|
@@ -143,9 +160,9 @@ with gr.Blocks() as demo:
|
|
143 |
yield updates
|
144 |
|
145 |
# 4) Get response from model_2
|
146 |
-
response_2
|
147 |
-
|
148 |
-
|
149 |
|
150 |
# The outputs we update after each yield
|
151 |
update_targets = []
|
|
|
3 |
import time
|
4 |
import os
|
5 |
import requests
|
6 |
+
import json
|
7 |
from dotenv import load_dotenv
|
8 |
|
9 |
# Load environment variables
|
|
|
45 |
"model": model,
|
46 |
"messages": [
|
47 |
{"role": "user", "content": question}
|
48 |
+
],
|
49 |
+
"stream": True
|
50 |
}
|
51 |
|
52 |
try:
|
|
|
54 |
OPENROUTER_BASE_URL,
|
55 |
headers=headers,
|
56 |
json=data,
|
57 |
+
timeout=30, # 30 second timeout
|
58 |
+
stream=True
|
59 |
)
|
60 |
response.raise_for_status()
|
61 |
|
62 |
+
full_response = ""
|
63 |
+
for line in response.iter_lines():
|
64 |
+
if line:
|
65 |
+
line = line.decode('utf-8')
|
66 |
+
if line.startswith('data: '):
|
67 |
+
json_str = line[6:] # Remove 'data: ' prefix
|
68 |
+
if json_str.strip() == '[DONE]':
|
69 |
+
break
|
70 |
+
try:
|
71 |
+
chunk = json.loads(json_str)
|
72 |
+
if chunk['choices'][0]['delta'].get('content'):
|
73 |
+
content = chunk['choices'][0]['delta']['content']
|
74 |
+
full_response += content
|
75 |
+
yield full_response
|
76 |
+
except json.JSONDecodeError:
|
77 |
+
continue
|
78 |
+
|
79 |
+
return full_response
|
80 |
|
81 |
except requests.exceptions.RequestException as e:
|
82 |
return f"Error: Failed to get response from {model}: {str(e)}"
|
|
|
139 |
questions = read_questions(file)
|
140 |
|
141 |
# Initialize all update values as blank
|
|
|
|
|
142 |
updates = [gr.update(value="")] * (MAX_QUESTIONS * 4)
|
143 |
|
144 |
# Process each question, 2 models per question
|
|
|
149 |
yield updates # partial update (reveal model_1 accordion)
|
150 |
|
151 |
# 2) Get response from model_1
|
152 |
+
for response_1 in get_response(question, model_1):
|
153 |
+
updates[i*4 + 1] = gr.update(value=response_1) # response1
|
154 |
+
yield updates
|
155 |
|
156 |
# 3) Pick second model (ensure different from first), yield it
|
157 |
remaining_models = [m for m in MODELS if m != model_1]
|
|
|
160 |
yield updates
|
161 |
|
162 |
# 4) Get response from model_2
|
163 |
+
for response_2 in get_response(question, model_2):
|
164 |
+
updates[i*4 + 3] = gr.update(value=response_2) # response2
|
165 |
+
yield updates
|
166 |
|
167 |
# The outputs we update after each yield
|
168 |
update_targets = []
|