atin121 commited on
Commit
c60890d
·
1 Parent(s): 67a3b3a

Added streaming (but only one at a time)

Browse files
Files changed (1) hide show
  1. app.py +29 -12
app.py CHANGED
@@ -3,6 +3,7 @@ import random
3
  import time
4
  import os
5
  import requests
 
6
  from dotenv import load_dotenv
7
 
8
  # Load environment variables
@@ -44,7 +45,8 @@ def get_response(question, model):
44
  "model": model,
45
  "messages": [
46
  {"role": "user", "content": question}
47
- ]
 
48
  }
49
 
50
  try:
@@ -52,12 +54,29 @@ def get_response(question, model):
52
  OPENROUTER_BASE_URL,
53
  headers=headers,
54
  json=data,
55
- timeout=30 # 30 second timeout
 
56
  )
57
  response.raise_for_status()
58
 
59
- result = response.json()
60
- return result['choices'][0]['message']['content']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  except requests.exceptions.RequestException as e:
63
  return f"Error: Failed to get response from {model}: {str(e)}"
@@ -120,8 +139,6 @@ with gr.Blocks() as demo:
120
  questions = read_questions(file)
121
 
122
  # Initialize all update values as blank
123
- # We have 4 fields per question (model1, response1, model2, response2)
124
- # => total of MAX_QUESTIONS * 4 output components
125
  updates = [gr.update(value="")] * (MAX_QUESTIONS * 4)
126
 
127
  # Process each question, 2 models per question
@@ -132,9 +149,9 @@ with gr.Blocks() as demo:
132
  yield updates # partial update (reveal model_1 accordion)
133
 
134
  # 2) Get response from model_1
135
- response_1 = get_response(question, model_1)
136
- updates[i*4 + 1] = gr.update(value=response_1) # response1
137
- yield updates
138
 
139
  # 3) Pick second model (ensure different from first), yield it
140
  remaining_models = [m for m in MODELS if m != model_1]
@@ -143,9 +160,9 @@ with gr.Blocks() as demo:
143
  yield updates
144
 
145
  # 4) Get response from model_2
146
- response_2 = get_response(question, model_2)
147
- updates[i*4 + 3] = gr.update(value=response_2) # response2
148
- yield updates
149
 
150
  # The outputs we update after each yield
151
  update_targets = []
 
3
  import time
4
  import os
5
  import requests
6
+ import json
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
 
45
  "model": model,
46
  "messages": [
47
  {"role": "user", "content": question}
48
+ ],
49
+ "stream": True
50
  }
51
 
52
  try:
 
54
  OPENROUTER_BASE_URL,
55
  headers=headers,
56
  json=data,
57
+ timeout=30, # 30 second timeout
58
+ stream=True
59
  )
60
  response.raise_for_status()
61
 
62
+ full_response = ""
63
+ for line in response.iter_lines():
64
+ if line:
65
+ line = line.decode('utf-8')
66
+ if line.startswith('data: '):
67
+ json_str = line[6:] # Remove 'data: ' prefix
68
+ if json_str.strip() == '[DONE]':
69
+ break
70
+ try:
71
+ chunk = json.loads(json_str)
72
+ if chunk['choices'][0]['delta'].get('content'):
73
+ content = chunk['choices'][0]['delta']['content']
74
+ full_response += content
75
+ yield full_response
76
+ except json.JSONDecodeError:
77
+ continue
78
+
79
+ return full_response
80
 
81
  except requests.exceptions.RequestException as e:
82
  return f"Error: Failed to get response from {model}: {str(e)}"
 
139
  questions = read_questions(file)
140
 
141
  # Initialize all update values as blank
 
 
142
  updates = [gr.update(value="")] * (MAX_QUESTIONS * 4)
143
 
144
  # Process each question, 2 models per question
 
149
  yield updates # partial update (reveal model_1 accordion)
150
 
151
  # 2) Get response from model_1
152
+ for response_1 in get_response(question, model_1):
153
+ updates[i*4 + 1] = gr.update(value=response_1) # response1
154
+ yield updates
155
 
156
  # 3) Pick second model (ensure different from first), yield it
157
  remaining_models = [m for m in MODELS if m != model_1]
 
160
  yield updates
161
 
162
  # 4) Get response from model_2
163
+ for response_2 in get_response(question, model_2):
164
+ updates[i*4 + 3] = gr.update(value=response_2) # response2
165
+ yield updates
166
 
167
  # The outputs we update after each yield
168
  update_targets = []