from llama_cpp.server.app import create_app, Settings from fastapi.responses import HTMLResponse from fastapi.middleware.cors import CORSMiddleware import os app = create_app( Settings( n_threads=4, model="model/gguf-model.gguf", embedding=True, n_gpu_layers=33 ) ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get('/', response_class=HTMLResponse) def custom_index_route(): html_content = """

Test

""" return HTMLResponse(content=html_content) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="https://matthoffner-llama-cpp-server.hf.space", port=7860)