from http.server import HTTPServer, BaseHTTPRequestHandler from urllib.parse import urlparse import json from llama_cpp import Llama print("Loading model...") llm = Llama( model_path="/home/oluser/olapp/model-q4_K.gguf", n_ctx=4096, n_parts=1, ) print("Model loaded!") class OlHandler(BaseHTTPRequestHandler): def do_GET(self): query = urlparse(self.path).query query_components = dict(qc.split("=") for qc in query.split("&")) q = query_components["q"] # message = '-=# ' + q + ' #=-' output = llm( q, max_tokens=32, # Generate up to 32 tokens echo=False ) self.send_response(200) self.end_headers() self.wfile.write(output.encode('utf-8')) return if __name__ == '__main__': olserver = HTTPServer(('0.0.0.0', 7860), OlHandler) print('Starting server at http://0.0.0.0:7860') olserver.serve_forever()