Spaces:
Runtime error
Runtime error
Commit
·
608950e
1
Parent(s):
ab616bd
Fixed streaming in chat_completion
Browse files- main/routes.py +7 -7
main/routes.py
CHANGED
@@ -70,12 +70,10 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
70 |
last_message = request.messages[-1].content
|
71 |
|
72 |
if request.stream:
|
73 |
-
# For streaming, we need to create a generator that yields OpenAI-compatible chunks
|
74 |
async def generate_stream():
|
75 |
async for chunk in api.generate_stream(
|
76 |
prompt=last_message,
|
77 |
):
|
78 |
-
# Create a streaming response chunk in OpenAI format
|
79 |
response_chunk = {
|
80 |
"id": "chatcmpl-123",
|
81 |
"object": "chat.completion.chunk",
|
@@ -89,16 +87,18 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|
89 |
"finish_reason": None
|
90 |
}]
|
91 |
}
|
|
|
92 |
yield f"data: {json.dumps(response_chunk)}\n\n"
|
93 |
-
|
94 |
-
# Send the final chunk
|
95 |
-
yield f"data: [DONE]\n\n"
|
96 |
|
97 |
return StreamingResponse(
|
98 |
generate_stream(),
|
99 |
-
media_type="text/event-stream"
|
|
|
|
|
|
|
|
|
100 |
)
|
101 |
-
|
102 |
else:
|
103 |
# For non-streaming, generate the full response
|
104 |
response_text = await api.generate_response(
|
|
|
70 |
last_message = request.messages[-1].content
|
71 |
|
72 |
if request.stream:
|
|
|
73 |
async def generate_stream():
|
74 |
async for chunk in api.generate_stream(
|
75 |
prompt=last_message,
|
76 |
):
|
|
|
77 |
response_chunk = {
|
78 |
"id": "chatcmpl-123",
|
79 |
"object": "chat.completion.chunk",
|
|
|
87 |
"finish_reason": None
|
88 |
}]
|
89 |
}
|
90 |
+
# Need to format this exactly as SSE requires
|
91 |
yield f"data: {json.dumps(response_chunk)}\n\n"
|
92 |
+
yield "data: [DONE]\n\n"
|
|
|
|
|
93 |
|
94 |
return StreamingResponse(
|
95 |
generate_stream(),
|
96 |
+
media_type="text/event-stream",
|
97 |
+
headers={
|
98 |
+
"Cache-Control": "no-cache",
|
99 |
+
"Connection": "keep-alive",
|
100 |
+
}
|
101 |
)
|
|
|
102 |
else:
|
103 |
# For non-streaming, generate the full response
|
104 |
response_text = await api.generate_response(
|