from flask import Flask, request, jsonify, render_template from llama_cpp import Llama app = Flask(__name__) # Load the Llama model (Ensure the GGUF file is in the same directory) llm = Llama.from_pretrained( repo_id="bartowski/google_gemma-3-1b-it-GGUF", filename="google_gemma-3-1b-it-IQ4_XS.gguf", ) @app.route("/") def home(): return render_template("index.html") @app.route("/chat", methods=["POST"]) def chat(): user_input = request.json.get("message", "") if not user_input: return jsonify({"error": "Empty input"}), 400 response = llm.create_chat_completion( messages=[{"role": "user", "content": user_input}] ) bot_reply = response["choices"][0]["message"]["content"] return jsonify({"response": bot_reply}) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)