Update api/utils.py
Browse files- api/utils.py +21 -24
api/utils.py
CHANGED
@@ -120,7 +120,10 @@ async def process_streaming_response(request: ChatRequest):
|
|
120 |
|
121 |
json_data = build_json_data(request, h_value, model_prefix)
|
122 |
|
|
|
123 |
buffer = ""
|
|
|
|
|
124 |
async with httpx.AsyncClient() as client:
|
125 |
try:
|
126 |
async with client.stream(
|
@@ -134,39 +137,33 @@ async def process_streaming_response(request: ChatRequest):
|
|
134 |
async for chunk in response.aiter_text():
|
135 |
if chunk:
|
136 |
buffer += chunk
|
137 |
-
|
|
|
138 |
if BLOCKED_MESSAGE in buffer:
|
139 |
logger.info("Blocked message detected in response.")
|
140 |
-
buffer = buffer.replace(BLOCKED_MESSAGE, '')
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
# Remove the prefix if present
|
150 |
cleaned_content = strip_model_prefix(content_to_yield, model_prefix)
|
151 |
timestamp = int(datetime.now().timestamp())
|
152 |
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
153 |
|
154 |
-
#
|
155 |
if buffer:
|
156 |
-
# Remove
|
157 |
if BLOCKED_MESSAGE in buffer:
|
158 |
logger.info("Blocked message detected in remaining buffer.")
|
159 |
-
buffer = buffer.replace(BLOCKED_MESSAGE, '')
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
timestamp = int(datetime.now().timestamp())
|
165 |
-
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
166 |
-
else:
|
167 |
-
cleaned_content = strip_model_prefix(buffer, model_prefix)
|
168 |
-
timestamp = int(datetime.now().timestamp())
|
169 |
-
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
170 |
|
171 |
# Signal the end of the streaming
|
172 |
timestamp = int(datetime.now().timestamp())
|
|
|
120 |
|
121 |
json_data = build_json_data(request, h_value, model_prefix)
|
122 |
|
123 |
+
# Initialize buffer to handle BLOCKED_MESSAGE that may be split across chunks
|
124 |
buffer = ""
|
125 |
+
buffer_size = len(BLOCKED_MESSAGE) - 1
|
126 |
+
|
127 |
async with httpx.AsyncClient() as client:
|
128 |
try:
|
129 |
async with client.stream(
|
|
|
137 |
async for chunk in response.aiter_text():
|
138 |
if chunk:
|
139 |
buffer += chunk
|
140 |
+
|
141 |
+
# Remove any occurrence of BLOCKED_MESSAGE in buffer
|
142 |
if BLOCKED_MESSAGE in buffer:
|
143 |
logger.info("Blocked message detected in response.")
|
144 |
+
buffer = buffer.replace(BLOCKED_MESSAGE, '')
|
145 |
+
|
146 |
+
# Process the buffer
|
147 |
+
# Keep last buffer_size characters in buffer to handle partial BLOCKED_MESSAGE
|
148 |
+
while len(buffer) >= buffer_size:
|
149 |
+
content_to_yield = buffer[:-buffer_size+1]
|
150 |
+
buffer = buffer[-buffer_size+1:]
|
151 |
+
|
152 |
+
# Remove model prefix if present
|
|
|
153 |
cleaned_content = strip_model_prefix(content_to_yield, model_prefix)
|
154 |
timestamp = int(datetime.now().timestamp())
|
155 |
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
156 |
|
157 |
+
# Process any remaining content in buffer
|
158 |
if buffer:
|
159 |
+
# Remove any occurrence of BLOCKED_MESSAGE in buffer
|
160 |
if BLOCKED_MESSAGE in buffer:
|
161 |
logger.info("Blocked message detected in remaining buffer.")
|
162 |
+
buffer = buffer.replace(BLOCKED_MESSAGE, '')
|
163 |
+
|
164 |
+
cleaned_content = strip_model_prefix(buffer, model_prefix)
|
165 |
+
timestamp = int(datetime.now().timestamp())
|
166 |
+
yield f"data: {json.dumps(create_chat_completion_data(cleaned_content, request.model, timestamp))}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
|
168 |
# Signal the end of the streaming
|
169 |
timestamp = int(datetime.now().timestamp())
|