llama-cpp-server / main.py
matthoffner's picture
Update main.py
883ba9d verified
raw
history blame
653 Bytes
from llama_cpp.server.app import create_app, Settings
from fastapi.responses import HTMLResponse
import os
app = create_app(
Settings(
n_threads=4,
model="model/gguf-model.gguf",
embedding=True
n_gpu_layers=33,
)
)
@app.get('/', response_class=HTMLResponse)
def custom_index_route():
html_content = """
<html>
<body>
<iframe src="/docs" frameborder="0" width="100%" height="100%"></iframe>
</body>
</html>
"""
return HTMLResponse(content=html_content)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)