Spaces:

YALCINKAYA
/

opsgenius3

Sleeping

App Files Files Community

YALCINKAYA commited on Oct 21, 2024

Commit

bbaa18e

1 Parent(s): 9f05250

improvements

Browse files

Files changed (2) hide show

app.py +45 -42
requirements.txt +5 -1

app.py CHANGED Viewed

@@ -1,50 +1,60 @@
 import os
-# Set the HF_HOME environment variable to a writable directory
-os.environ["HF_HOME"] = "/workspace/huggingface_cache"  # Change this to a writable path in your space
 from flask import Flask, jsonify, request
 from flask_cors import CORS
-from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 app = Flask(__name__)
 # Enable CORS for specific origins
 CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
-# Model setup
-model_id = "YALCINKAYA/opsgenius-large"
-def get_model_and_tokenizer(model_id):
-    # Load the tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
-    tokenizer.pad_token = tokenizer.eos_token
-    # Load the model
-    model = AutoModelForCausalLM.from_pretrained(model_id)
-    model.config.use_cache = False
-    return model, tokenizer
-model, tokenizer = get_model_and_tokenizer(model_id)
-def generate_response(user_input):
-    prompt = formatted_prompt(user_input)
     # Prepare the input tensors
-    inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda')  # Move inputs to GPU
     generation_config = GenerationConfig(
-        max_new_tokens=100,  # Allow enough length for full responses
         min_length=5,
         temperature=0.7,
-        do_sample=False,  # Set to False for deterministic responses
         num_beams=1,
-        pad_token_id=tokenizer.eos_token_id,  # Set pad_token_id
-        truncation=True  # Enable truncation
     )
-    # Instead of generating a response from the model, return a dummy message
-    #dummy_response = "This is a dummy response for the input: " + user_input
     # Generate response
     outputs = model.generate(**inputs, generation_config=generation_config)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -56,35 +66,28 @@ def formatted_prompt(question) -> str:
 @app.route("/", methods=["GET"])
 def handle_get_request():
-    # Get the 'message' parameter from the query string
     message = request.args.get("message", "No message provided.")
-    # Return a JSON response including the received message
     return jsonify({"message": message, "status": "GET request successful!"})
 @app.route("/send_message", methods=["POST"])
 def handle_post_request():
-    # Get the JSON data from the request
     data = request.get_json()
-    # Check if data is None
     if data is None:
         return jsonify({"error": "No JSON data provided"}), 400
-    # Extract the 'inputs' and 'authtoken' from the JSON data
     message = data.get("inputs", "No message provided.")
-    new_token = os.getenv("HF_TOKEN")
-    # Generate a response from the dummy message instead of the model
-    model_response = generate_response(message)
-    # Return a JSON response including the generated response
     return jsonify({
         "received_message": model_response,
         "status": "POST request successful!"
     })
-# Note: Remove the app.run() call to let Hugging Face handle it
-# Launch the interface
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

 import os
 from flask import Flask, jsonify, request
 from flask_cors import CORS
+from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, BitsAndBytesConfig
+from peft import LoraConfig, AutoPeftModelForCausalLM
+# Set the HF_HOME environment variable to a writable directory
+os.environ["HF_HOME"] = "/workspace/huggingface_cache"  # Change this to a writable path in your space
 app = Flask(__name__)
 # Enable CORS for specific origins
 CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+def initialize_model(model_id):
+    global model, tokenizer
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        tokenizer.pad_token = tokenizer.eos_token
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype="float16",
+            bnb_4bit_use_double_quant=True
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            quantization_config=bnb_config,
+            device_map="auto"
+        )
+        model.config.use_cache = False
+        model.config.pretraining_tp = 1
+    except Exception as e:
+        print(f"Error loading model: {e}")
+def generate_response(user_input, model_id):
+    prompt = formatted_prompt(user_input)
+    if model is None or tokenizer is None:
+        return "Model or tokenizer not initialized."
     # Prepare the input tensors
+    inputs = tokenizer(prompt, return_tensors="pt")  # You may want to move to GPU here if available
     generation_config = GenerationConfig(
+        max_new_tokens=100,
         min_length=5,
         temperature=0.7,
+        do_sample=False,
         num_beams=1,
+        pad_token_id=tokenizer.eos_token_id,
+        truncation=True
     )
     # Generate response
     outputs = model.generate(**inputs, generation_config=generation_config)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 @app.route("/", methods=["GET"])
 def handle_get_request():
     message = request.args.get("message", "No message provided.")
     return jsonify({"message": message, "status": "GET request successful!"})
 @app.route("/send_message", methods=["POST"])
 def handle_post_request():
     data = request.get_json()
     if data is None:
         return jsonify({"error": "No JSON data provided"}), 400
     message = data.get("inputs", "No message provided.")
+    model_id = data.get("model_id", "YALCINKAYA/opsgenius-large")  # Default model if not provided
+    # Generate a response from the model
+    model_response = generate_response(message, model_id)
     return jsonify({
         "received_message": model_response,
         "status": "POST request successful!"
     })
+# Initialize the model and tokenizer when the app starts
+initialize_model("YALCINKAYA/opsgenius-large")
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

requirements.txt CHANGED Viewed

@@ -1,5 +1,9 @@
 flask
 flask_cors
 transformers
 torch
-huggingface-hub

 flask
 flask_cors
+huggingface-hub
 transformers
 torch
+accelerate
+bitsandbytes
+peft
+trl