Spaces:
Sleeping
Sleeping
File size: 3,187 Bytes
27f6ef7 c7393d8 d8fe9d1 1e6ac26 27f6ef7 e384a9f 233b98c 9f05250 e384a9f 8a9401d e384a9f 8a9401d 9f05250 8a9401d 9f05250 8a9401d 233b98c 9f05250 8a9401d 9f05250 8a9401d 233b98c e384a9f b4930ce 233b98c 8a9401d 233b98c e384a9f 8a9401d e384a9f 8a9401d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import os
# Set the HF_HOME environment variable to a writable directory
os.environ["HF_HOME"] = "/workspace/huggingface_cache" # Change this to a writable path in your space
from flask import Flask, jsonify, request
from flask_cors import CORS
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
app = Flask(__name__)
# Enable CORS for specific origins
CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
# Model setup
model_id = "YALCINKAYA/opsgenius-large"
def get_model_and_tokenizer(model_id):
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
# Load the model
model = AutoModelForCausalLM.from_pretrained(model_id)
model.config.use_cache = False
return model, tokenizer
model, tokenizer = get_model_and_tokenizer(model_id)
def generate_response(user_input):
prompt = formatted_prompt(user_input)
# Prepare the input tensors
inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda') # Move inputs to GPU
generation_config = GenerationConfig(
max_new_tokens=100, # Allow enough length for full responses
min_length=5,
temperature=0.7,
do_sample=False, # Set to False for deterministic responses
num_beams=1,
pad_token_id=tokenizer.eos_token_id, # Set pad_token_id
truncation=True # Enable truncation
)
# Instead of generating a response from the model, return a dummy message
#dummy_response = "This is a dummy response for the input: " + user_input
# Generate response
outputs = model.generate(**inputs, generation_config=generation_config)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
def formatted_prompt(question) -> str:
return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"
@app.route("/", methods=["GET"])
def handle_get_request():
# Get the 'message' parameter from the query string
message = request.args.get("message", "No message provided.")
# Return a JSON response including the received message
return jsonify({"message": message, "status": "GET request successful!"})
@app.route("/send_message", methods=["POST"])
def handle_post_request():
# Get the JSON data from the request
data = request.get_json()
# Check if data is None
if data is None:
return jsonify({"error": "No JSON data provided"}), 400
# Extract the 'inputs' and 'authtoken' from the JSON data
message = data.get("inputs", "No message provided.")
new_token = os.getenv("HF_TOKEN")
# Generate a response from the dummy message instead of the model
model_response = generate_response(message)
# Return a JSON response including the generated response
return jsonify({
"received_message": model_response,
"status": "POST request successful!"
})
# Note: Remove the app.run() call to let Hugging Face handle it
# Launch the interface
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)
|