from http.server import HTTPServer, BaseHTTPRequestHandler from urllib.parse import urlparse import urllib.parse import json from llama_cpp import Llama print("Loading model...") # llm = Llama(model_path="/home/oluser/olapp/model-q4_K.gguf") print("Model loaded!") class OlHandler(BaseHTTPRequestHandler): def do_GET(self): query = urlparse(self.path).query query_components = dict(qc.split("=") for qc in query.split("&")) q = urllib.parse.unquote_plus(query_components["q"]).encode("cp1252").decode("utf-8") #output = llm.create_completion( # q, # max_tokens=32, # echo=False #)["choices"][0]["text"].encode("cp1252").decode("utf-8") self.send_response(200) self.end_headers() #self.wfile.write(output.encode('utf-8')) self.wfile.write(q.encode('utf-8')) return if __name__ == '__main__': olserver = HTTPServer(('0.0.0.0', 7860), OlHandler) print('Starting server at http://0.0.0.0:7860') olserver.serve_forever()