Spaces:
Sleeping
Sleeping
Commit
·
9f05250
1
Parent(s):
7c8555b
model run for mode_response
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ os.environ["HF_HOME"] = "/workspace/huggingface_cache" # Change this to a writa
|
|
5 |
|
6 |
from flask import Flask, jsonify, request
|
7 |
from flask_cors import CORS
|
8 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
9 |
|
10 |
app = Flask(__name__)
|
11 |
|
@@ -13,16 +13,43 @@ app = Flask(__name__)
|
|
13 |
CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
|
14 |
|
15 |
# Model setup
|
16 |
-
model_id = "YALCINKAYA/opsgenius-large"
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
def generate_response(user_input):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
# Instead of generating a response from the model, return a dummy message
|
24 |
-
dummy_response = "This is a dummy response for the input: " + user_input
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
|
27 |
def formatted_prompt(question) -> str:
|
28 |
return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"
|
|
|
5 |
|
6 |
from flask import Flask, jsonify, request
|
7 |
from flask_cors import CORS
|
8 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
9 |
|
10 |
app = Flask(__name__)
|
11 |
|
|
|
13 |
CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
|
14 |
|
15 |
# Model setup
|
16 |
+
model_id = "YALCINKAYA/opsgenius-large"
|
17 |
|
18 |
+
def get_model_and_tokenizer(model_id):
|
19 |
+
# Load the tokenizer
|
20 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
|
21 |
+
tokenizer.pad_token = tokenizer.eos_token
|
22 |
+
|
23 |
+
# Load the model
|
24 |
+
model = AutoModelForCausalLM.from_pretrained(model_id)
|
25 |
+
model.config.use_cache = False
|
26 |
+
return model, tokenizer
|
27 |
+
|
28 |
+
model, tokenizer = get_model_and_tokenizer(model_id)
|
29 |
|
30 |
def generate_response(user_input):
|
31 |
+
|
32 |
+
prompt = formatted_prompt(user_input)
|
33 |
+
# Prepare the input tensors
|
34 |
+
inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda') # Move inputs to GPU
|
35 |
+
|
36 |
+
generation_config = GenerationConfig(
|
37 |
+
max_new_tokens=100, # Allow enough length for full responses
|
38 |
+
min_length=5,
|
39 |
+
temperature=0.7,
|
40 |
+
do_sample=False, # Set to False for deterministic responses
|
41 |
+
num_beams=1,
|
42 |
+
pad_token_id=tokenizer.eos_token_id, # Set pad_token_id
|
43 |
+
truncation=True # Enable truncation
|
44 |
+
)
|
45 |
+
|
46 |
# Instead of generating a response from the model, return a dummy message
|
47 |
+
#dummy_response = "This is a dummy response for the input: " + user_input
|
48 |
+
# Generate response
|
49 |
+
outputs = model.generate(**inputs, generation_config=generation_config)
|
50 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
51 |
+
|
52 |
+
return response
|
53 |
|
54 |
def formatted_prompt(question) -> str:
|
55 |
return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"
|