Damien Benveniste commited on
Commit
b210a93
·
1 Parent(s): 2ffd335
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -5,6 +5,7 @@ from vllm import AsyncLLMEngine, SamplingParams
5
  from vllm.engine.arg_utils import AsyncEngineArgs
6
  import asyncio
7
  import json
 
8
 
9
  app = FastAPI()
10
 
@@ -32,8 +33,10 @@ async def generate_stream(prompt: str, max_tokens: int, temperature: float):
32
  temperature=temperature,
33
  max_tokens=max_tokens
34
  )
 
 
35
 
36
- async for output in engine.generate(prompt, sampling_params, 1): # True enables streaming
37
  yield f"data: {json.dumps({'text': output.outputs[0].text})}\n\n"
38
 
39
  yield "data: [DONE]\n\n"
 
5
  from vllm.engine.arg_utils import AsyncEngineArgs
6
  import asyncio
7
  import json
8
+ import uuid
9
 
10
  app = FastAPI()
11
 
 
33
  temperature=temperature,
34
  max_tokens=max_tokens
35
  )
36
+
37
+ request_id = str(uuid.uuid4())
38
 
39
+ async for output in engine.generate(prompt, sampling_params, request_id=request_id): # True enables streaming
40
  yield f"data: {json.dumps({'text': output.outputs[0].text})}\n\n"
41
 
42
  yield "data: [DONE]\n\n"