File size: 3,187 Bytes
27f6ef7
c7393d8
d8fe9d1
1e6ac26
27f6ef7
e384a9f
233b98c
9f05250
e384a9f
 
8a9401d
e384a9f
 
8a9401d
 
9f05250
8a9401d
9f05250
 
 
 
 
 
 
 
 
 
 
8a9401d
233b98c
9f05250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a9401d
9f05250
 
 
 
 
 
8a9401d
233b98c
 
e384a9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4930ce
 
233b98c
8a9401d
 
233b98c
 
e384a9f
8a9401d
e384a9f
 
 
 
 
 
8a9401d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
 
# Set the HF_HOME environment variable to a writable directory
os.environ["HF_HOME"] = "/workspace/huggingface_cache"  # Change this to a writable path in your space

from flask import Flask, jsonify, request
from flask_cors import CORS 
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig  

app = Flask(__name__)

# Enable CORS for specific origins
CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})

# Model setup
model_id = "YALCINKAYA/opsgenius-large" 

def get_model_and_tokenizer(model_id):
    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
    tokenizer.pad_token = tokenizer.eos_token
    
    # Load the model
    model = AutoModelForCausalLM.from_pretrained(model_id)
    model.config.use_cache = False
    return model, tokenizer

model, tokenizer = get_model_and_tokenizer(model_id)

def generate_response(user_input):

    prompt = formatted_prompt(user_input)
    # Prepare the input tensors
    inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda')  # Move inputs to GPU
   
    generation_config = GenerationConfig(
        max_new_tokens=100,  # Allow enough length for full responses
        min_length=5,
        temperature=0.7, 
        do_sample=False,  # Set to False for deterministic responses 
        num_beams=1,
        pad_token_id=tokenizer.eos_token_id,  # Set pad_token_id
        truncation=True  # Enable truncation
    )

    # Instead of generating a response from the model, return a dummy message
    #dummy_response = "This is a dummy response for the input: " + user_input
    # Generate response
    outputs = model.generate(**inputs, generation_config=generation_config)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return response

def formatted_prompt(question) -> str:
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"

@app.route("/", methods=["GET"])
def handle_get_request():
    # Get the 'message' parameter from the query string
    message = request.args.get("message", "No message provided.")
    
    # Return a JSON response including the received message
    return jsonify({"message": message, "status": "GET request successful!"})

@app.route("/send_message", methods=["POST"])
def handle_post_request():
    # Get the JSON data from the request
    data = request.get_json()

    # Check if data is None
    if data is None:
        return jsonify({"error": "No JSON data provided"}), 400

    # Extract the 'inputs' and 'authtoken' from the JSON data
    message = data.get("inputs", "No message provided.") 
    new_token = os.getenv("HF_TOKEN")

    # Generate a response from the dummy message instead of the model
    model_response = generate_response(message)

    # Return a JSON response including the generated response 
    return jsonify({
        "received_message": model_response, 
        "status": "POST request successful!"
    })

# Note: Remove the app.run() call to let Hugging Face handle it
# Launch the interface
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)