Spaces:

YALCINKAYA
/

opsgenius3

Sleeping

App Files Files Community

opsgenius3 / app.py

YALCINKAYA

model run for mode_response

9f05250 5 months ago

raw

history blame

3.19 kB

	import os

	# Set the HF_HOME environment variable to a writable directory
	os.environ["HF_HOME"] = "/workspace/huggingface_cache" # Change this to a writable path in your space

	from flask import Flask, jsonify, request
	from flask_cors import CORS
	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

	app = Flask(__name__)

	# Enable CORS for specific origins
	CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})

	# Model setup
	model_id = "YALCINKAYA/opsgenius-large"

	def get_model_and_tokenizer(model_id):
	# Load the tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
	tokenizer.pad_token = tokenizer.eos_token

	# Load the model
	model = AutoModelForCausalLM.from_pretrained(model_id)
	model.config.use_cache = False
	return model, tokenizer

	model, tokenizer = get_model_and_tokenizer(model_id)

	def generate_response(user_input):

	prompt = formatted_prompt(user_input)
	# Prepare the input tensors
	inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda') # Move inputs to GPU

	generation_config = GenerationConfig(
	max_new_tokens=100, # Allow enough length for full responses
	min_length=5,
	temperature=0.7,
	do_sample=False, # Set to False for deterministic responses
	num_beams=1,
	pad_token_id=tokenizer.eos_token_id, # Set pad_token_id
	truncation=True # Enable truncation
	)

	# Instead of generating a response from the model, return a dummy message
	#dummy_response = "This is a dummy response for the input: " + user_input
	# Generate response
	outputs = model.generate(**inputs, generation_config=generation_config)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	return response

	def formatted_prompt(question) -> str:
	return f"<\|im_start\|>user\n{question}<\|im_end\|>\n<\|im_start\|>assistant:"

	@app.route("/", methods=["GET"])
	def handle_get_request():
	# Get the 'message' parameter from the query string
	message = request.args.get("message", "No message provided.")

	# Return a JSON response including the received message
	return jsonify({"message": message, "status": "GET request successful!"})

	@app.route("/send_message", methods=["POST"])
	def handle_post_request():
	# Get the JSON data from the request
	data = request.get_json()

	# Check if data is None
	if data is None:
	return jsonify({"error": "No JSON data provided"}), 400

	# Extract the 'inputs' and 'authtoken' from the JSON data
	message = data.get("inputs", "No message provided.")
	new_token = os.getenv("HF_TOKEN")

	# Generate a response from the dummy message instead of the model
	model_response = generate_response(message)

	# Return a JSON response including the generated response
	return jsonify({
	"received_message": model_response,
	"status": "POST request successful!"
	})

	# Note: Remove the app.run() call to let Hugging Face handle it
	# Launch the interface
	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=7860)