Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
b210a93
1
Parent(s):
2ffd335
modified
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ from vllm import AsyncLLMEngine, SamplingParams
|
|
5 |
from vllm.engine.arg_utils import AsyncEngineArgs
|
6 |
import asyncio
|
7 |
import json
|
|
|
8 |
|
9 |
app = FastAPI()
|
10 |
|
@@ -32,8 +33,10 @@ async def generate_stream(prompt: str, max_tokens: int, temperature: float):
|
|
32 |
temperature=temperature,
|
33 |
max_tokens=max_tokens
|
34 |
)
|
|
|
|
|
35 |
|
36 |
-
async for output in engine.generate(prompt, sampling_params,
|
37 |
yield f"data: {json.dumps({'text': output.outputs[0].text})}\n\n"
|
38 |
|
39 |
yield "data: [DONE]\n\n"
|
|
|
5 |
from vllm.engine.arg_utils import AsyncEngineArgs
|
6 |
import asyncio
|
7 |
import json
|
8 |
+
import uuid
|
9 |
|
10 |
app = FastAPI()
|
11 |
|
|
|
33 |
temperature=temperature,
|
34 |
max_tokens=max_tokens
|
35 |
)
|
36 |
+
|
37 |
+
request_id = str(uuid.uuid4())
|
38 |
|
39 |
+
async for output in engine.generate(prompt, sampling_params, request_id=request_id): # True enables streaming
|
40 |
yield f"data: {json.dumps({'text': output.outputs[0].text})}\n\n"
|
41 |
|
42 |
yield "data: [DONE]\n\n"
|