MrAli commited on
Commit
0f637e1
·
verified ·
1 Parent(s): ceede36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -1
app.py CHANGED
@@ -3,12 +3,16 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
  import os
5
  import requests
 
 
 
 
6
  os.system("ulimit -l unlimited")
7
 
8
  app = FastAPI()
9
 
10
  hf_hub_download("TheBloke/deepseek-coder-1.3b-base-GGUF", "deepseek-coder-1.3b-base.Q5_K_M.gguf", local_dir="./")
11
- model_l = Llama(model_path="./deepseek-coder-1.3b-base.Q5_K_M.gguf", n_ctx=16000, n_gpu_layers=0, n_threads=2, use_mlock=True)
12
 
13
 
14
  @app.get("/check")
@@ -34,6 +38,14 @@ async def completion(request: Request):
34
  except:
35
  return {"responses": "Error!"}
36
 
 
 
 
 
 
 
 
 
37
  if __name__ == "__main__":
38
  import uvicorn
39
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
3
  from huggingface_hub import hf_hub_download
4
  import os
5
  import requests
6
+ from llama_cpp.server.app import create_app
7
+ from llama_cpp.server.settings import Settings
8
+
9
+
10
  os.system("ulimit -l unlimited")
11
 
12
  app = FastAPI()
13
 
14
  hf_hub_download("TheBloke/deepseek-coder-1.3b-base-GGUF", "deepseek-coder-1.3b-base.Q5_K_M.gguf", local_dir="./")
15
+ model_l = Llama(model_path="./deepseek-coder-1.3b-base.Q5_K_M.gguf", n_ctx=16192, n_gpu_layers=0, n_threads=2, use_mlock=True)
16
 
17
 
18
  @app.get("/check")
 
38
  except:
39
  return {"responses": "Error!"}
40
 
41
+
42
+ app = create_app(
43
+ Settings(
44
+ n_threads=2, # set to number of cpu cores
45
+ model="./deepseek-coder-1.3b-base.Q5_K_M.gguf",
46
+ embedding=True
47
+ ))
48
+
49
  if __name__ == "__main__":
50
  import uvicorn
51
  uvicorn.run(app, host="0.0.0.0", port=7860)