AurelioAguirre commited on
Commit
608950e
·
1 Parent(s): ab616bd

Fixed streaming in chat_completion

Browse files
Files changed (1) hide show
  1. main/routes.py +7 -7
main/routes.py CHANGED
@@ -70,12 +70,10 @@ async def create_chat_completion(request: ChatCompletionRequest):
70
  last_message = request.messages[-1].content
71
 
72
  if request.stream:
73
- # For streaming, we need to create a generator that yields OpenAI-compatible chunks
74
  async def generate_stream():
75
  async for chunk in api.generate_stream(
76
  prompt=last_message,
77
  ):
78
- # Create a streaming response chunk in OpenAI format
79
  response_chunk = {
80
  "id": "chatcmpl-123",
81
  "object": "chat.completion.chunk",
@@ -89,16 +87,18 @@ async def create_chat_completion(request: ChatCompletionRequest):
89
  "finish_reason": None
90
  }]
91
  }
 
92
  yield f"data: {json.dumps(response_chunk)}\n\n"
93
-
94
- # Send the final chunk
95
- yield f"data: [DONE]\n\n"
96
 
97
  return StreamingResponse(
98
  generate_stream(),
99
- media_type="text/event-stream"
 
 
 
 
100
  )
101
-
102
  else:
103
  # For non-streaming, generate the full response
104
  response_text = await api.generate_response(
 
70
  last_message = request.messages[-1].content
71
 
72
  if request.stream:
 
73
  async def generate_stream():
74
  async for chunk in api.generate_stream(
75
  prompt=last_message,
76
  ):
 
77
  response_chunk = {
78
  "id": "chatcmpl-123",
79
  "object": "chat.completion.chunk",
 
87
  "finish_reason": None
88
  }]
89
  }
90
+ # Need to format this exactly as SSE requires
91
  yield f"data: {json.dumps(response_chunk)}\n\n"
92
+ yield "data: [DONE]\n\n"
 
 
93
 
94
  return StreamingResponse(
95
  generate_stream(),
96
+ media_type="text/event-stream",
97
+ headers={
98
+ "Cache-Control": "no-cache",
99
+ "Connection": "keep-alive",
100
+ }
101
  )
 
102
  else:
103
  # For non-streaming, generate the full response
104
  response_text = await api.generate_response(