YALCINKAYA commited on
Commit
bbaa18e
·
1 Parent(s): 9f05250

improvements

Browse files
Files changed (2) hide show
  1. app.py +45 -42
  2. requirements.txt +5 -1
app.py CHANGED
@@ -1,50 +1,60 @@
1
  import os
2
-
3
- # Set the HF_HOME environment variable to a writable directory
4
- os.environ["HF_HOME"] = "/workspace/huggingface_cache" # Change this to a writable path in your space
5
-
6
  from flask import Flask, jsonify, request
7
  from flask_cors import CORS
8
- from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 
 
 
 
9
 
10
  app = Flask(__name__)
11
 
12
  # Enable CORS for specific origins
13
  CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
14
 
15
- # Model setup
16
- model_id = "YALCINKAYA/opsgenius-large"
17
-
18
- def get_model_and_tokenizer(model_id):
19
- # Load the tokenizer
20
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
21
- tokenizer.pad_token = tokenizer.eos_token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Load the model
24
- model = AutoModelForCausalLM.from_pretrained(model_id)
25
- model.config.use_cache = False
26
- return model, tokenizer
27
-
28
- model, tokenizer = get_model_and_tokenizer(model_id)
29
 
30
- def generate_response(user_input):
31
-
32
- prompt = formatted_prompt(user_input)
33
  # Prepare the input tensors
34
- inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda') # Move inputs to GPU
35
-
36
  generation_config = GenerationConfig(
37
- max_new_tokens=100, # Allow enough length for full responses
38
  min_length=5,
39
  temperature=0.7,
40
- do_sample=False, # Set to False for deterministic responses
41
  num_beams=1,
42
- pad_token_id=tokenizer.eos_token_id, # Set pad_token_id
43
- truncation=True # Enable truncation
44
  )
45
 
46
- # Instead of generating a response from the model, return a dummy message
47
- #dummy_response = "This is a dummy response for the input: " + user_input
48
  # Generate response
49
  outputs = model.generate(**inputs, generation_config=generation_config)
50
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -56,35 +66,28 @@ def formatted_prompt(question) -> str:
56
 
57
  @app.route("/", methods=["GET"])
58
  def handle_get_request():
59
- # Get the 'message' parameter from the query string
60
  message = request.args.get("message", "No message provided.")
61
-
62
- # Return a JSON response including the received message
63
  return jsonify({"message": message, "status": "GET request successful!"})
64
 
65
  @app.route("/send_message", methods=["POST"])
66
  def handle_post_request():
67
- # Get the JSON data from the request
68
  data = request.get_json()
69
-
70
- # Check if data is None
71
  if data is None:
72
  return jsonify({"error": "No JSON data provided"}), 400
73
 
74
- # Extract the 'inputs' and 'authtoken' from the JSON data
75
  message = data.get("inputs", "No message provided.")
76
- new_token = os.getenv("HF_TOKEN")
77
 
78
- # Generate a response from the dummy message instead of the model
79
- model_response = generate_response(message)
80
 
81
- # Return a JSON response including the generated response
82
  return jsonify({
83
  "received_message": model_response,
84
  "status": "POST request successful!"
85
  })
86
 
87
- # Note: Remove the app.run() call to let Hugging Face handle it
88
- # Launch the interface
 
89
  if __name__ == '__main__':
90
  app.run(host='0.0.0.0', port=7860)
 
1
  import os
 
 
 
 
2
  from flask import Flask, jsonify, request
3
  from flask_cors import CORS
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
5
+ from peft import LoraConfig, AutoPeftModelForCausalLM
6
+
7
+ # Set the HF_HOME environment variable to a writable directory
8
+ os.environ["HF_HOME"] = "/workspace/huggingface_cache" # Change this to a writable path in your space
9
 
10
  app = Flask(__name__)
11
 
12
  # Enable CORS for specific origins
13
  CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
14
 
15
+ # Global variables for model and tokenizer
16
+ model = None
17
+ tokenizer = None
18
+
19
+ def initialize_model(model_id):
20
+ global model, tokenizer
21
+ try:
22
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
23
+ tokenizer.pad_token = tokenizer.eos_token
24
+ bnb_config = BitsAndBytesConfig(
25
+ load_in_4bit=True,
26
+ bnb_4bit_quant_type="nf4",
27
+ bnb_4bit_compute_dtype="float16",
28
+ bnb_4bit_use_double_quant=True
29
+ )
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ model_id,
32
+ quantization_config=bnb_config,
33
+ device_map="auto"
34
+ )
35
+ model.config.use_cache = False
36
+ model.config.pretraining_tp = 1
37
+ except Exception as e:
38
+ print(f"Error loading model: {e}")
39
+
40
+ def generate_response(user_input, model_id):
41
+ prompt = formatted_prompt(user_input)
42
 
43
+ if model is None or tokenizer is None:
44
+ return "Model or tokenizer not initialized."
 
 
 
 
45
 
 
 
 
46
  # Prepare the input tensors
47
+ inputs = tokenizer(prompt, return_tensors="pt") # You may want to move to GPU here if available
 
48
  generation_config = GenerationConfig(
49
+ max_new_tokens=100,
50
  min_length=5,
51
  temperature=0.7,
52
+ do_sample=False,
53
  num_beams=1,
54
+ pad_token_id=tokenizer.eos_token_id,
55
+ truncation=True
56
  )
57
 
 
 
58
  # Generate response
59
  outputs = model.generate(**inputs, generation_config=generation_config)
60
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
66
 
67
  @app.route("/", methods=["GET"])
68
  def handle_get_request():
 
69
  message = request.args.get("message", "No message provided.")
 
 
70
  return jsonify({"message": message, "status": "GET request successful!"})
71
 
72
  @app.route("/send_message", methods=["POST"])
73
  def handle_post_request():
 
74
  data = request.get_json()
 
 
75
  if data is None:
76
  return jsonify({"error": "No JSON data provided"}), 400
77
 
 
78
  message = data.get("inputs", "No message provided.")
79
+ model_id = data.get("model_id", "YALCINKAYA/opsgenius-large") # Default model if not provided
80
 
81
+ # Generate a response from the model
82
+ model_response = generate_response(message, model_id)
83
 
 
84
  return jsonify({
85
  "received_message": model_response,
86
  "status": "POST request successful!"
87
  })
88
 
89
+ # Initialize the model and tokenizer when the app starts
90
+ initialize_model("YALCINKAYA/opsgenius-large")
91
+
92
  if __name__ == '__main__':
93
  app.run(host='0.0.0.0', port=7860)
requirements.txt CHANGED
@@ -1,5 +1,9 @@
1
  flask
2
  flask_cors
 
3
  transformers
4
  torch
5
- huggingface-hub
 
 
 
 
1
  flask
2
  flask_cors
3
+ huggingface-hub
4
  transformers
5
  torch
6
+ accelerate
7
+ bitsandbytes
8
+ peft
9
+ trl