llama-cpp-server / main.py
matthoffner's picture
Update main.py
ad8faaf verified
raw
history blame
717 Bytes
from llama_cpp.server.app import create_app, Settings
from fastapi.responses import HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse
import os
model_path = "/home/user/model/gguf-model.gguf"
app = create_app(
Settings(
n_threads=4,
model=model_path,
embedding=True,
n_gpu_layers=33
)
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def redirect_root_to_docs():
return RedirectResponse("/docs")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)