Update app.py
Browse files
app.py
CHANGED
@@ -3,12 +3,16 @@ from llama_cpp import Llama
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
import os
|
5 |
import requests
|
|
|
|
|
|
|
|
|
6 |
os.system("ulimit -l unlimited")
|
7 |
|
8 |
app = FastAPI()
|
9 |
|
10 |
hf_hub_download("TheBloke/deepseek-coder-1.3b-base-GGUF", "deepseek-coder-1.3b-base.Q5_K_M.gguf", local_dir="./")
|
11 |
-
model_l = Llama(model_path="./deepseek-coder-1.3b-base.Q5_K_M.gguf", n_ctx=
|
12 |
|
13 |
|
14 |
@app.get("/check")
|
@@ -34,6 +38,14 @@ async def completion(request: Request):
|
|
34 |
except:
|
35 |
return {"responses": "Error!"}
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
if __name__ == "__main__":
|
38 |
import uvicorn
|
39 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
import os
|
5 |
import requests
|
6 |
+
from llama_cpp.server.app import create_app
|
7 |
+
from llama_cpp.server.settings import Settings
|
8 |
+
|
9 |
+
|
10 |
os.system("ulimit -l unlimited")
|
11 |
|
12 |
app = FastAPI()
|
13 |
|
14 |
hf_hub_download("TheBloke/deepseek-coder-1.3b-base-GGUF", "deepseek-coder-1.3b-base.Q5_K_M.gguf", local_dir="./")
|
15 |
+
model_l = Llama(model_path="./deepseek-coder-1.3b-base.Q5_K_M.gguf", n_ctx=16192, n_gpu_layers=0, n_threads=2, use_mlock=True)
|
16 |
|
17 |
|
18 |
@app.get("/check")
|
|
|
38 |
except:
|
39 |
return {"responses": "Error!"}
|
40 |
|
41 |
+
|
42 |
+
app = create_app(
|
43 |
+
Settings(
|
44 |
+
n_threads=2, # set to number of cpu cores
|
45 |
+
model="./deepseek-coder-1.3b-base.Q5_K_M.gguf",
|
46 |
+
embedding=True
|
47 |
+
))
|
48 |
+
|
49 |
if __name__ == "__main__":
|
50 |
import uvicorn
|
51 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|