llama-cpp-server / main.py
matthoffner's picture
Update main.py
fb5554d verified
raw
history blame
778 Bytes
from llama_cpp.server.app import create_app, Settings
from fastapi.responses import HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
import os
app = create_app(
Settings(
n_threads=4,
model="model/gguf-model.gguf",
embedding=True,
n_gpu_layers=33
)
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get('/', response_class=HTMLResponse)
def custom_index_route():
html_content = """
<html>
<body>
<h1>Test</h1>
</body>
</html>
"""
return HTMLResponse(content=html_content)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)