from llama_cpp.server.app import create_app, Settings from fastapi.responses import HTMLResponse import os app = create_app( Settings( n_threads=4, model="model/gguf-model.gguf", embedding=True, n_gpu_layers=33 ) ) @app.get('/', response_class=HTMLResponse) def custom_index_route(): html_content = """