Update main.py
Browse files
main.py
CHANGED
@@ -560,49 +560,44 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
|
|
560 |
async def generate():
|
561 |
try:
|
562 |
assistant_content = ""
|
563 |
-
|
564 |
if isinstance(chunk, ImageResponseData):
|
565 |
# Handle image responses if necessary
|
566 |
image_markdown = f"\n"
|
567 |
assistant_content += image_markdown
|
568 |
response_chunk = {
|
569 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
570 |
-
"object": "chat.completion",
|
571 |
"created": int(datetime.now().timestamp()),
|
572 |
"model": request.model,
|
573 |
"choices": [
|
574 |
{
|
575 |
"index": 0,
|
576 |
-
"
|
577 |
-
"role": "assistant",
|
578 |
-
"content": image_markdown
|
579 |
-
},
|
580 |
"finish_reason": None
|
581 |
}
|
582 |
-
]
|
583 |
-
"usage": None, # Usage can be updated if you track tokens in real-time
|
584 |
}
|
|
|
|
|
|
|
585 |
else:
|
586 |
assistant_content += chunk
|
587 |
# Yield the chunk as a partial choice
|
588 |
response_chunk = {
|
589 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
590 |
-
"object": "chat.completion",
|
591 |
"created": int(datetime.now().timestamp()),
|
592 |
"model": request.model,
|
593 |
"choices": [
|
594 |
{
|
595 |
"index": 0,
|
596 |
-
"
|
597 |
-
"role": "assistant",
|
598 |
-
"content": chunk
|
599 |
-
},
|
600 |
"finish_reason": None
|
601 |
}
|
602 |
-
]
|
603 |
-
"usage": None, # Usage can be updated if you track tokens in real-time
|
604 |
}
|
605 |
-
|
606 |
|
607 |
# After all chunks are sent, send the final message with finish_reason
|
608 |
prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
|
@@ -632,17 +627,17 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
|
|
632 |
"estimated_cost": estimated_cost
|
633 |
},
|
634 |
}
|
635 |
-
yield f"{json.dumps(final_response)}\n\n"
|
636 |
-
yield "DONE\n\n"
|
637 |
except HTTPException as he:
|
638 |
error_response = {"error": he.detail}
|
639 |
-
yield f"{json.dumps(error_response)}\n\n"
|
640 |
except Exception as e:
|
641 |
logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
|
642 |
error_response = {"error": str(e)}
|
643 |
-
yield f"{json.dumps(error_response)}\n\n"
|
644 |
|
645 |
-
return StreamingResponse(generate(), media_type="text/
|
646 |
else:
|
647 |
response_content = ""
|
648 |
async for chunk in async_generator:
|
|
|
560 |
async def generate():
|
561 |
try:
|
562 |
assistant_content = ""
|
563 |
+
for chunk in async_generator:
|
564 |
if isinstance(chunk, ImageResponseData):
|
565 |
# Handle image responses if necessary
|
566 |
image_markdown = f"\n"
|
567 |
assistant_content += image_markdown
|
568 |
response_chunk = {
|
569 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
570 |
+
"object": "chat.completion.chunk", # Change to 'chat.completion.chunk'
|
571 |
"created": int(datetime.now().timestamp()),
|
572 |
"model": request.model,
|
573 |
"choices": [
|
574 |
{
|
575 |
"index": 0,
|
576 |
+
"delta": {"role": "assistant", "content": " "}, # Initial space or any starter
|
|
|
|
|
|
|
577 |
"finish_reason": None
|
578 |
}
|
579 |
+
]
|
|
|
580 |
}
|
581 |
+
yield f"data: {json.dumps(response_chunk)}\n\n"
|
582 |
+
response_chunk["choices"][0]["delta"]["content"] = image_markdown.strip()
|
583 |
+
yield f"data: {json.dumps(response_chunk)}\n\n"
|
584 |
else:
|
585 |
assistant_content += chunk
|
586 |
# Yield the chunk as a partial choice
|
587 |
response_chunk = {
|
588 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
589 |
+
"object": "chat.completion.chunk", # Change to 'chat.completion.chunk'
|
590 |
"created": int(datetime.now().timestamp()),
|
591 |
"model": request.model,
|
592 |
"choices": [
|
593 |
{
|
594 |
"index": 0,
|
595 |
+
"delta": {"role": "assistant", "content": chunk},
|
|
|
|
|
|
|
596 |
"finish_reason": None
|
597 |
}
|
598 |
+
]
|
|
|
599 |
}
|
600 |
+
yield f"data: {json.dumps(response_chunk)}\n\n"
|
601 |
|
602 |
# After all chunks are sent, send the final message with finish_reason
|
603 |
prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
|
|
|
627 |
"estimated_cost": estimated_cost
|
628 |
},
|
629 |
}
|
630 |
+
yield f"data: {json.dumps(final_response)}\n\n"
|
631 |
+
yield "data: [DONE]\n\n"
|
632 |
except HTTPException as he:
|
633 |
error_response = {"error": he.detail}
|
634 |
+
yield f"data: {json.dumps(error_response)}\n\n"
|
635 |
except Exception as e:
|
636 |
logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
|
637 |
error_response = {"error": str(e)}
|
638 |
+
yield f"data: {json.dumps(error_response)}\n\n"
|
639 |
|
640 |
+
return StreamingResponse(generate(), media_type="text/event-stream") # Use SSE media type
|
641 |
else:
|
642 |
response_content = ""
|
643 |
async for chunk in async_generator:
|