AurelioAguirre commited on
Commit
e612baa
·
1 Parent(s): 608950e

Fixed Streaming response being double wrapped

Browse files
Files changed (1) hide show
  1. main/routes.py +7 -1
main/routes.py CHANGED
@@ -74,6 +74,13 @@ async def create_chat_completion(request: ChatCompletionRequest):
74
  async for chunk in api.generate_stream(
75
  prompt=last_message,
76
  ):
 
 
 
 
 
 
 
77
  response_chunk = {
78
  "id": "chatcmpl-123",
79
  "object": "chat.completion.chunk",
@@ -87,7 +94,6 @@ async def create_chat_completion(request: ChatCompletionRequest):
87
  "finish_reason": None
88
  }]
89
  }
90
- # Need to format this exactly as SSE requires
91
  yield f"data: {json.dumps(response_chunk)}\n\n"
92
  yield "data: [DONE]\n\n"
93
 
 
74
  async for chunk in api.generate_stream(
75
  prompt=last_message,
76
  ):
77
+ # Parse the SSE format from LLM Server
78
+ if chunk.startswith('data: '):
79
+ chunk = chunk[6:].strip() # Remove "data: " and trailing \n\n
80
+
81
+ if chunk == '[DONE]':
82
+ continue
83
+
84
  response_chunk = {
85
  "id": "chatcmpl-123",
86
  "object": "chat.completion.chunk",
 
94
  "finish_reason": None
95
  }]
96
  }
 
97
  yield f"data: {json.dumps(response_chunk)}\n\n"
98
  yield "data: [DONE]\n\n"
99