Damien Benveniste commited on
Commit
14549f3
·
1 Parent(s): 1d83e4f
Files changed (1) hide show
  1. app.py +2 -0
app.py CHANGED
@@ -13,6 +13,8 @@ app = FastAPI()
13
  engine = AsyncLLMEngine.from_engine_args(
14
  AsyncEngineArgs(
15
  model='microsoft/Phi-3-mini-4k-instruct',
 
 
16
  )
17
  )
18
 
 
13
  engine = AsyncLLMEngine.from_engine_args(
14
  AsyncEngineArgs(
15
  model='microsoft/Phi-3-mini-4k-instruct',
16
+ dtype="half",
17
+ gpu_memory_utilization=0.99,
18
  )
19
  )
20