from llama_cpp.server.app import create_app, Settings | |
from fastapi.responses import HTMLResponse | |
import os | |
app = create_app( | |
Settings( | |
n_threads=4, | |
model="model/gguf-model.gguf", | |
embedding=True, | |
n_gpu_layers=33 | |
) | |
) | |
def custom_index_route(): | |
html_content = """ | |
<html> | |
<body> | |
<h1>Test</h1> | |
</body> | |
</html> | |
""" | |
return HTMLResponse(content=html_content) | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) | |