opsgenius3 / app.py
YALCINKAYA's picture
model run for mode_response
9f05250
raw
history blame
3.19 kB
import os
# Set the HF_HOME environment variable to a writable directory
os.environ["HF_HOME"] = "/workspace/huggingface_cache" # Change this to a writable path in your space
from flask import Flask, jsonify, request
from flask_cors import CORS
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
app = Flask(__name__)
# Enable CORS for specific origins
CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
# Model setup
model_id = "YALCINKAYA/opsgenius-large"
def get_model_and_tokenizer(model_id):
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
# Load the model
model = AutoModelForCausalLM.from_pretrained(model_id)
model.config.use_cache = False
return model, tokenizer
model, tokenizer = get_model_and_tokenizer(model_id)
def generate_response(user_input):
prompt = formatted_prompt(user_input)
# Prepare the input tensors
inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda') # Move inputs to GPU
generation_config = GenerationConfig(
max_new_tokens=100, # Allow enough length for full responses
min_length=5,
temperature=0.7,
do_sample=False, # Set to False for deterministic responses
num_beams=1,
pad_token_id=tokenizer.eos_token_id, # Set pad_token_id
truncation=True # Enable truncation
)
# Instead of generating a response from the model, return a dummy message
#dummy_response = "This is a dummy response for the input: " + user_input
# Generate response
outputs = model.generate(**inputs, generation_config=generation_config)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
def formatted_prompt(question) -> str:
return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"
@app.route("/", methods=["GET"])
def handle_get_request():
# Get the 'message' parameter from the query string
message = request.args.get("message", "No message provided.")
# Return a JSON response including the received message
return jsonify({"message": message, "status": "GET request successful!"})
@app.route("/send_message", methods=["POST"])
def handle_post_request():
# Get the JSON data from the request
data = request.get_json()
# Check if data is None
if data is None:
return jsonify({"error": "No JSON data provided"}), 400
# Extract the 'inputs' and 'authtoken' from the JSON data
message = data.get("inputs", "No message provided.")
new_token = os.getenv("HF_TOKEN")
# Generate a response from the dummy message instead of the model
model_response = generate_response(message)
# Return a JSON response including the generated response
return jsonify({
"received_message": model_response,
"status": "POST request successful!"
})
# Note: Remove the app.run() call to let Hugging Face handle it
# Launch the interface
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)