Spaces:

YALCINKAYA
/

opsgenius3

Sleeping

File size: 3,187 Bytes

import os
 
# Set the HF_HOME environment variable to a writable directory
os.environ["HF_HOME"] = "/workspace/huggingface_cache"  # Change this to a writable path in your space

from flask import Flask, jsonify, request
from flask_cors import CORS 
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig  

app = Flask(__name__)

# Enable CORS for specific origins
CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})

# Model setup
model_id = "YALCINKAYA/opsgenius-large" 

def get_model_and_tokenizer(model_id):
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
    tokenizer.pad_token = tokenizer.eos_token
    
    # Load the model
    model = AutoModelForCausalLM.from_pretrained(model_id)
    model.config.use_cache = False
    return model, tokenizer

model, tokenizer = get_model_and_tokenizer(model_id)

def generate_response(user_input):

    prompt = formatted_prompt(user_input)
    # Prepare the input tensors
    inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda')  # Move inputs to GPU
   
    generation_config = GenerationConfig(
        max_new_tokens=100,  # Allow enough length for full responses
        min_length=5,
        temperature=0.7, 
        do_sample=False,  # Set to False for deterministic responses 
        num_beams=1,
        pad_token_id=tokenizer.eos_token_id,  # Set pad_token_id
        truncation=True  # Enable truncation
    )

    # Instead of generating a response from the model, return a dummy message
    #dummy_response = "This is a dummy response for the input: " + user_input
    # Generate response
    outputs = model.generate(**inputs, generation_config=generation_config)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return response

def formatted_prompt(question) -> str:
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"

@app.route("/", methods=["GET"])
def handle_get_request():
    # Get the 'message' parameter from the query string
    message = request.args.get("message", "No message provided.")
    
    # Return a JSON response including the received message
    return jsonify({"message": message, "status": "GET request successful!"})

@app.route("/send_message", methods=["POST"])
def handle_post_request():
    # Get the JSON data from the request
    data = request.get_json()

    # Check if data is None
    if data is None:
        return jsonify({"error": "No JSON data provided"}), 400

    # Extract the 'inputs' and 'authtoken' from the JSON data
    message = data.get("inputs", "No message provided.") 
    new_token = os.getenv("HF_TOKEN")

    # Generate a response from the dummy message instead of the model
    model_response = generate_response(message)

    # Return a JSON response including the generated response 
    return jsonify({
        "received_message": model_response, 
        "status": "POST request successful!"
    })

# Note: Remove the app.run() call to let Hugging Face handle it
# Launch the interface
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)