from llama_cpp.server.app import create_app, Settings from fastapi.responses import HTMLResponse from fastapi.middleware.cors import CORSMiddleware import os app = create_app( Settings( n_threads=4, model="model/gguf-model.gguf", embedding=True, n_gpu_layers=33 ) ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get('/', response_class=HTMLResponse) def custom_index_route(): html_content = """