from flask import Flask, request, jsonify, render_template from llama_cpp import Llama app = Flask(__name__) # Load the model print("🔄 Loading model... (this may take a while)") llm = Llama.from_pretrained( repo_id="bartowski/google_gemma-3-1b-it-GGUF", filename="google_gemma-3-1b-it-IQ4_XS.gguf", ) print("✅ Model loaded!") @app.route("/") def home(): print("📢 Serving index.html") return render_template("index.html") @app.route("/chat", methods=["POST"]) def chat(): user_input = request.json.get("message", "") print(f"💬 Received message: {user_input}") if not user_input: print("⚠️ Empty input received!") return jsonify({"error": "Empty input"}), 400 response = llm.create_chat_completion( messages=[{"role": "user", "content": user_input}] ) bot_reply = response["choices"][0]["message"]["content"] print(f"🤖 Bot response: {bot_reply}") return jsonify({"response": bot_reply}) if __name__ == "__main__": print("🚀 Starting Flask app on port 7860") app.run(host="0.0.0.0", port=7860, debug=True)