JJ94 commited on
Commit
8d8525b
Β·
verified Β·
1 Parent(s): 2e1b7f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -23
app.py CHANGED
@@ -1,47 +1,48 @@
1
- from flask import Flask, request, jsonify, render_template
2
  from llama_cpp import Llama
3
 
4
  app = Flask(__name__)
5
 
6
  # Load the model
7
- print("πŸ”„ Loading model... (this may take a while)")
8
  llm = Llama.from_pretrained(
9
  repo_id="bartowski/google_gemma-3-1b-it-GGUF",
10
  filename="google_gemma-3-1b-it-IQ4_XS.gguf",
 
11
  )
12
  print("βœ… Model loaded!")
13
 
14
- @app.route("/")
15
- def home():
16
- print("πŸ“’ Serving index.html")
17
- return render_template("index.html")
18
-
19
- @app.route("/chat", methods=["POST"])
20
- def chat():
21
- user_input = request.json.get("message", "")
22
- print(f"πŸ’¬ Received message: {user_input}")
23
-
24
- if not user_input:
25
- print("⚠️ Empty input received!")
26
- return jsonify({"error": "Empty input"}), 400
27
 
 
 
28
  try:
29
  response = llm.create_chat_completion(
30
- messages=[{"role": "user", "content": user_input}]
 
31
  )
32
 
33
- print(f"πŸ” Full response object: {response}") # Debugging
34
- bot_reply = response["choices"][0]["message"]["content"]
35
- print(f"πŸ€– Bot response: {bot_reply}")
36
-
37
- return jsonify({"response": bot_reply})
 
38
 
39
  except Exception as e:
40
  print(f"❌ Error generating response: {e}")
41
- return jsonify({"error": "Model failed to generate response"}), 500
 
 
 
 
 
 
 
 
 
 
42
 
43
  if __name__ == "__main__":
44
- print("πŸš€ Starting Flask app on port 7860")
45
  app.run(host="0.0.0.0", port=7860, debug=True)
46
 
47
 
 
1
+ from flask import Flask, request, Response, jsonify
2
  from llama_cpp import Llama
3
 
4
  app = Flask(__name__)
5
 
6
  # Load the model
7
+ print("πŸ”„ Loading model...")
8
  llm = Llama.from_pretrained(
9
  repo_id="bartowski/google_gemma-3-1b-it-GGUF",
10
  filename="google_gemma-3-1b-it-IQ4_XS.gguf",
11
+ n_ctx=2048
12
  )
13
  print("βœ… Model loaded!")
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ def generate_response(user_input):
17
+ """Generator function to stream model output"""
18
  try:
19
  response = llm.create_chat_completion(
20
+ messages=[{"role": "user", "content": user_input}],
21
+ stream=True # Enable streaming
22
  )
23
 
24
+ for chunk in response:
25
+ if "choices" in chunk and len(chunk["choices"]) > 0:
26
+ token = chunk["choices"][0]["delta"].get("content", "")
27
+ if token:
28
+ print(f"πŸ“ Token: {token}", flush=True) # Debugging
29
+ yield token
30
 
31
  except Exception as e:
32
  print(f"❌ Error generating response: {e}")
33
+ yield "[Error occurred]"
34
+
35
+
36
+ @app.route("/chat", methods=["POST"])
37
+ def chat():
38
+ user_input = request.json.get("message", "")
39
+ if not user_input:
40
+ return jsonify({"error": "Empty input"}), 400
41
+
42
+ return Response(generate_response(user_input), content_type="text/plain")
43
+
44
 
45
  if __name__ == "__main__":
 
46
  app.run(host="0.0.0.0", port=7860, debug=True)
47
 
48