Niansuh commited on
Commit
94877ab
·
verified ·
1 Parent(s): 7714a08

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +16 -21
main.py CHANGED
@@ -560,49 +560,44 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
560
  async def generate():
561
  try:
562
  assistant_content = ""
563
- async for chunk in async_generator:
564
  if isinstance(chunk, ImageResponseData):
565
  # Handle image responses if necessary
566
  image_markdown = f"![image]({chunk.images})\n"
567
  assistant_content += image_markdown
568
  response_chunk = {
569
  "id": f"chatcmpl-{uuid.uuid4()}",
570
- "object": "chat.completion",
571
  "created": int(datetime.now().timestamp()),
572
  "model": request.model,
573
  "choices": [
574
  {
575
  "index": 0,
576
- "message": {
577
- "role": "assistant",
578
- "content": image_markdown
579
- },
580
  "finish_reason": None
581
  }
582
- ],
583
- "usage": None, # Usage can be updated if you track tokens in real-time
584
  }
 
 
 
585
  else:
586
  assistant_content += chunk
587
  # Yield the chunk as a partial choice
588
  response_chunk = {
589
  "id": f"chatcmpl-{uuid.uuid4()}",
590
- "object": "chat.completion",
591
  "created": int(datetime.now().timestamp()),
592
  "model": request.model,
593
  "choices": [
594
  {
595
  "index": 0,
596
- "message": {
597
- "role": "assistant",
598
- "content": chunk
599
- },
600
  "finish_reason": None
601
  }
602
- ],
603
- "usage": None, # Usage can be updated if you track tokens in real-time
604
  }
605
- yield f"{json.dumps(response_chunk)}\n\n"
606
 
607
  # After all chunks are sent, send the final message with finish_reason
608
  prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
@@ -632,17 +627,17 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
632
  "estimated_cost": estimated_cost
633
  },
634
  }
635
- yield f"{json.dumps(final_response)}\n\n"
636
- yield "DONE\n\n"
637
  except HTTPException as he:
638
  error_response = {"error": he.detail}
639
- yield f"{json.dumps(error_response)}\n\n"
640
  except Exception as e:
641
  logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
642
  error_response = {"error": str(e)}
643
- yield f"{json.dumps(error_response)}\n\n"
644
 
645
- return StreamingResponse(generate(), media_type="text/plain")
646
  else:
647
  response_content = ""
648
  async for chunk in async_generator:
 
560
  async def generate():
561
  try:
562
  assistant_content = ""
563
+ for chunk in async_generator:
564
  if isinstance(chunk, ImageResponseData):
565
  # Handle image responses if necessary
566
  image_markdown = f"![image]({chunk.images})\n"
567
  assistant_content += image_markdown
568
  response_chunk = {
569
  "id": f"chatcmpl-{uuid.uuid4()}",
570
+ "object": "chat.completion.chunk", # Change to 'chat.completion.chunk'
571
  "created": int(datetime.now().timestamp()),
572
  "model": request.model,
573
  "choices": [
574
  {
575
  "index": 0,
576
+ "delta": {"role": "assistant", "content": " "}, # Initial space or any starter
 
 
 
577
  "finish_reason": None
578
  }
579
+ ]
 
580
  }
581
+ yield f"data: {json.dumps(response_chunk)}\n\n"
582
+ response_chunk["choices"][0]["delta"]["content"] = image_markdown.strip()
583
+ yield f"data: {json.dumps(response_chunk)}\n\n"
584
  else:
585
  assistant_content += chunk
586
  # Yield the chunk as a partial choice
587
  response_chunk = {
588
  "id": f"chatcmpl-{uuid.uuid4()}",
589
+ "object": "chat.completion.chunk", # Change to 'chat.completion.chunk'
590
  "created": int(datetime.now().timestamp()),
591
  "model": request.model,
592
  "choices": [
593
  {
594
  "index": 0,
595
+ "delta": {"role": "assistant", "content": chunk},
 
 
 
596
  "finish_reason": None
597
  }
598
+ ]
 
599
  }
600
+ yield f"data: {json.dumps(response_chunk)}\n\n"
601
 
602
  # After all chunks are sent, send the final message with finish_reason
603
  prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
 
627
  "estimated_cost": estimated_cost
628
  },
629
  }
630
+ yield f"data: {json.dumps(final_response)}\n\n"
631
+ yield "data: [DONE]\n\n"
632
  except HTTPException as he:
633
  error_response = {"error": he.detail}
634
+ yield f"data: {json.dumps(error_response)}\n\n"
635
  except Exception as e:
636
  logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
637
  error_response = {"error": str(e)}
638
+ yield f"data: {json.dumps(error_response)}\n\n"
639
 
640
+ return StreamingResponse(generate(), media_type="text/event-stream") # Use SSE media type
641
  else:
642
  response_content = ""
643
  async for chunk in async_generator: