YALCINKAYA commited on
Commit
8cf19de
·
verified ·
1 Parent(s): 284c0f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -59
app.py CHANGED
@@ -1,21 +1,4 @@
1
- import os
2
- import torch
3
- from flask import Flask, jsonify, request
4
- from flask_cors import CORS
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, GenerationConfig
6
- import re
7
-
8
- # Set the HF_HOME environment variable to a writable directory
9
- os.environ["HF_HOME"] = "/workspace/huggingface_cache"
10
-
11
- app = Flask(__name__)
12
-
13
- # Enable CORS for specific origins
14
- CORS(app, resources={r"/send_message": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
15
-
16
- # Global variables for model and tokenizer
17
- model = None
18
- tokenizer = None
19
 
20
  def get_model_and_tokenizer(model_id: str):
21
  global model, tokenizer
@@ -28,7 +11,8 @@ def get_model_and_tokenizer(model_id: str):
28
  print(f"Loading model for model_id: {model_id}")
29
 
30
  bnb_config = BitsAndBytesConfig(
31
- load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True
 
32
  )
33
 
34
  model = AutoModelForCausalLM.from_pretrained(
@@ -40,63 +24,63 @@ def get_model_and_tokenizer(model_id: str):
40
  model.config.pad_token_id = tokenizer.eos_token_id # Fix padding issue
41
 
42
  except Exception as e:
43
- print(f"Error loading model: {e}")
44
- raise e
 
45
 
46
  def generate_response(user_input, model_id):
47
- # Ensure model and tokenizer are loaded
48
- get_model_and_tokenizer(model_id)
49
 
50
- prompt = user_input
51
- device = "cuda" if torch.cuda.is_available() else "cpu"
52
-
53
- generation_config = GenerationConfig(
54
- penalty_alpha=0.6,
55
- do_sample=True,
56
- top_p=0.2,
57
- top_k=50,
58
- temperature=0.3,
59
- repetition_penalty=1.2,
60
- max_new_tokens=60,
61
- pad_token_id=tokenizer.eos_token_id
62
- )
63
 
64
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
65
- model.to(device)
 
 
 
 
 
 
 
 
66
 
67
- outputs = model.generate(**inputs, generation_config=generation_config)
68
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
69
 
70
- # Clean up response
71
- cleaned_response = re.sub(r"(User:|Assistant:)", "", response).strip()
72
- return cleaned_response.split("\n")[0] # Keep only the first line of response
73
 
74
- @app.route("/", methods=["GET"])
75
- def handle_get_request():
76
- message = request.args.get("message", "No message provided.")
77
- return jsonify({"message": message, "status": "GET request successful!"})
 
 
 
 
78
 
79
  @app.route("/send_message", methods=["POST"])
80
  def handle_post_request():
81
- data = request.get_json()
82
- if data is None:
83
- return jsonify({"error": "No JSON data provided"}), 400
 
84
 
85
- message = data.get("inputs", "No message provided.")
86
- model_id = data.get("model_id", "YALCINKAYA/FinetunedByYalcin")
87
 
88
- try:
89
- print(f"Processing request")
90
  model_response = generate_response(message, model_id)
 
91
  return jsonify({
92
  "received_message": model_response,
93
  "model_id": model_id,
94
  "status": "POST request successful!"
95
  })
96
  except Exception as e:
97
- error_message = str(e) if app.debug else "An error occurred while processing your request."
98
- print(f"Error handling POST request: {e}")
99
- return jsonify({"error": error_message}), 500
100
 
101
- if __name__ == '__main__':
102
- app.run(host='0.0.0.0', port=7860)
 
1
+ import traceback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  def get_model_and_tokenizer(model_id: str):
4
  global model, tokenizer
 
11
  print(f"Loading model for model_id: {model_id}")
12
 
13
  bnb_config = BitsAndBytesConfig(
14
+ load_in_4bit=True, bnb_4bit_quant_type="nf4",
15
+ bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True
16
  )
17
 
18
  model = AutoModelForCausalLM.from_pretrained(
 
24
  model.config.pad_token_id = tokenizer.eos_token_id # Fix padding issue
25
 
26
  except Exception as e:
27
+ print("Error loading model:")
28
+ print(traceback.format_exc()) # Logs the full error traceback
29
+ raise e # Reraise the exception to stop execution
30
 
31
  def generate_response(user_input, model_id):
32
+ try:
33
+ get_model_and_tokenizer(model_id)
34
 
35
+ prompt = user_input
36
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ generation_config = GenerationConfig(
39
+ penalty_alpha=0.6,
40
+ do_sample=True,
41
+ top_p=0.2,
42
+ top_k=50,
43
+ temperature=0.3,
44
+ repetition_penalty=1.2,
45
+ max_new_tokens=60,
46
+ pad_token_id=tokenizer.eos_token_id
47
+ )
48
 
49
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
50
+ model.to(device)
51
 
52
+ outputs = model.generate(**inputs, generation_config=generation_config)
53
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
54
 
55
+ # Clean up response
56
+ cleaned_response = re.sub(r"(User:|Assistant:)", "", response).strip()
57
+ return cleaned_response.split("\n")[0]
58
+
59
+ except Exception as e:
60
+ print("Error in generate_response:")
61
+ print(traceback.format_exc()) # Logs the full traceback
62
+ raise e
63
 
64
  @app.route("/send_message", methods=["POST"])
65
  def handle_post_request():
66
+ try:
67
+ data = request.get_json()
68
+ if data is None:
69
+ return jsonify({"error": "No JSON data provided"}), 400
70
 
71
+ message = data.get("inputs", "No message provided.")
72
+ model_id = data.get("model_id", "YALCINKAYA/FinetunedByYalcin")
73
 
74
+ print(f"Processing request with model_id: {model_id}")
 
75
  model_response = generate_response(message, model_id)
76
+
77
  return jsonify({
78
  "received_message": model_response,
79
  "model_id": model_id,
80
  "status": "POST request successful!"
81
  })
82
  except Exception as e:
83
+ print("Error handling POST request:")
84
+ print(traceback.format_exc()) # Logs the full traceback
85
+ return jsonify({"error": str(e)}), 500
86