test24

Sleeping

App Files Files Community

Niansuh commited on Oct 18, 2024

Commit

94877ab

verified ·

1 Parent(s): 7714a08

Update main.py

Browse files

Files changed (1) hide show

main.py +16 -21

main.py CHANGED Viewed

@@ -560,49 +560,44 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
             async def generate():
                 try:
                     assistant_content = ""
-                    async for chunk in async_generator:
                         if isinstance(chunk, ImageResponseData):
                             # Handle image responses if necessary
                             image_markdown = f"![image]({chunk.images})\n"
                             assistant_content += image_markdown
                             response_chunk = {
                                 "id": f"chatcmpl-{uuid.uuid4()}",
-                                "object": "chat.completion",
                                 "created": int(datetime.now().timestamp()),
                                 "model": request.model,
                                 "choices": [
                                     {
                                         "index": 0,
-                                        "message": {
-                                            "role": "assistant",
-                                            "content": image_markdown
-                                        },
                                         "finish_reason": None
                                     }
-                                ],
-                                "usage": None,  # Usage can be updated if you track tokens in real-time
                             }
                         else:
                             assistant_content += chunk
                             # Yield the chunk as a partial choice
                             response_chunk = {
                                 "id": f"chatcmpl-{uuid.uuid4()}",
-                                "object": "chat.completion",
                                 "created": int(datetime.now().timestamp()),
                                 "model": request.model,
                                 "choices": [
                                     {
                                         "index": 0,
-                                        "message": {
-                                            "role": "assistant",
-                                            "content": chunk
-                                        },
                                         "finish_reason": None
                                     }
-                                ],
-                                "usage": None,  # Usage can be updated if you track tokens in real-time
                             }
-                        yield f"{json.dumps(response_chunk)}\n\n"
                     # After all chunks are sent, send the final message with finish_reason
                     prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
@@ -632,17 +627,17 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
                             "estimated_cost": estimated_cost
                         },
                     }
-                    yield f"{json.dumps(final_response)}\n\n"
-                    yield "DONE\n\n"
                 except HTTPException as he:
                     error_response = {"error": he.detail}
-                    yield f"{json.dumps(error_response)}\n\n"
                 except Exception as e:
                     logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
                     error_response = {"error": str(e)}
-                    yield f"{json.dumps(error_response)}\n\n"
-            return StreamingResponse(generate(), media_type="text/plain")
         else:
             response_content = ""
             async for chunk in async_generator:

             async def generate():
                 try:
                     assistant_content = ""
+                    for chunk in async_generator:
                         if isinstance(chunk, ImageResponseData):
                             # Handle image responses if necessary
                             image_markdown = f"![image]({chunk.images})\n"
                             assistant_content += image_markdown
                             response_chunk = {
                                 "id": f"chatcmpl-{uuid.uuid4()}",
+                                "object": "chat.completion.chunk",  # Change to 'chat.completion.chunk'
                                 "created": int(datetime.now().timestamp()),
                                 "model": request.model,
                                 "choices": [
                                     {
                                         "index": 0,
+                                        "delta": {"role": "assistant", "content": " "},  # Initial space or any starter
                                         "finish_reason": None
                                     }
+                                ]
                             }
+                            yield f"data: {json.dumps(response_chunk)}\n\n"
+                            response_chunk["choices"][0]["delta"]["content"] = image_markdown.strip()
+                            yield f"data: {json.dumps(response_chunk)}\n\n"
                         else:
                             assistant_content += chunk
                             # Yield the chunk as a partial choice
                             response_chunk = {
                                 "id": f"chatcmpl-{uuid.uuid4()}",
+                                "object": "chat.completion.chunk",  # Change to 'chat.completion.chunk'
                                 "created": int(datetime.now().timestamp()),
                                 "model": request.model,
                                 "choices": [
                                     {
                                         "index": 0,
+                                        "delta": {"role": "assistant", "content": chunk},
                                         "finish_reason": None
                                     }
+                                ]
                             }
+                            yield f"data: {json.dumps(response_chunk)}\n\n"
                     # After all chunks are sent, send the final message with finish_reason
                     prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
                             "estimated_cost": estimated_cost
                         },
                     }
+                    yield f"data: {json.dumps(final_response)}\n\n"
+                    yield "data: [DONE]\n\n"
                 except HTTPException as he:
                     error_response = {"error": he.detail}
+                    yield f"data: {json.dumps(error_response)}\n\n"
                 except Exception as e:
                     logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
                     error_response = {"error": str(e)}
+                    yield f"data: {json.dumps(error_response)}\n\n"
+            return StreamingResponse(generate(), media_type="text/event-stream")  # Use SSE media type
         else:
             response_content = ""
             async for chunk in async_generator: