Commit
·
3d19e0f
1
Parent(s):
865b816
Update main.py
Browse files
main.py
CHANGED
@@ -91,33 +91,25 @@ async def chat(request: ChatCompletionRequest):
|
|
91 |
return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
|
92 |
|
93 |
async def stream_response(tokens: Any) -> None:
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
{
|
101 |
-
'
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
})
|
114 |
-
await send({
|
115 |
-
"type": "http.response.body",
|
116 |
-
"body": b"event: done\ndata: {}\n\n",
|
117 |
-
"more_body": False,
|
118 |
-
})
|
119 |
-
except Exception as e:
|
120 |
-
print(f"Exception in event publisher: {str(e)}")
|
121 |
|
122 |
async def chatV2(request: Request, body: ChatCompletionRequest):
|
123 |
combined_messages = ' '.join([message.content for message in body.messages])
|
|
|
91 |
return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
|
92 |
|
93 |
async def stream_response(tokens: Any) -> None:
|
94 |
+
try:
|
95 |
+
iterator: Generator = llm.generate(tokens)
|
96 |
+
for chat_chunk in iterator:
|
97 |
+
response = {
|
98 |
+
'choices': [
|
99 |
+
{
|
100 |
+
'message': {
|
101 |
+
'role': 'system',
|
102 |
+
'content': llm.detokenize(chat_chunk)
|
103 |
+
},
|
104 |
+
'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
|
105 |
+
}
|
106 |
+
]
|
107 |
+
}
|
108 |
+
yield f"data: {json.dumps(response)}\n\n"
|
109 |
+
yield b"event: done\ndata: {}\n\n"
|
110 |
+
except Exception as e:
|
111 |
+
print(f"Exception in event publisher: {str(e)}")
|
112 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
async def chatV2(request: Request, body: ChatCompletionRequest):
|
115 |
combined_messages = ' '.join([message.content for message in body.messages])
|