YALCINKAYA commited on
Commit
9f05250
·
1 Parent(s): 7c8555b

model run for mode_response

Browse files
Files changed (1) hide show
  1. app.py +34 -7
app.py CHANGED
@@ -5,7 +5,7 @@ os.environ["HF_HOME"] = "/workspace/huggingface_cache" # Change this to a writa
5
 
6
  from flask import Flask, jsonify, request
7
  from flask_cors import CORS
8
- from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
9
 
10
  app = Flask(__name__)
11
 
@@ -13,16 +13,43 @@ app = Flask(__name__)
13
  CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
14
 
15
  # Model setup
16
- model_id = "YALCINKAYA/opsgenius-large"
17
 
18
- # Load the tokenizer and model
19
- tokenizer = AutoTokenizer.from_pretrained(model_id)
20
- model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
 
 
 
21
 
22
  def generate_response(user_input):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # Instead of generating a response from the model, return a dummy message
24
- dummy_response = "This is a dummy response for the input: " + user_input
25
- return dummy_response
 
 
 
 
26
 
27
  def formatted_prompt(question) -> str:
28
  return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"
 
5
 
6
  from flask import Flask, jsonify, request
7
  from flask_cors import CORS
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
9
 
10
  app = Flask(__name__)
11
 
 
13
  CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "https://main.dbn2ikif9ou3g.amplifyapp.com"]}})
14
 
15
  # Model setup
16
+ model_id = "YALCINKAYA/opsgenius-large"
17
 
18
+ def get_model_and_tokenizer(model_id):
19
+ # Load the tokenizer
20
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
21
+ tokenizer.pad_token = tokenizer.eos_token
22
+
23
+ # Load the model
24
+ model = AutoModelForCausalLM.from_pretrained(model_id)
25
+ model.config.use_cache = False
26
+ return model, tokenizer
27
+
28
+ model, tokenizer = get_model_and_tokenizer(model_id)
29
 
30
  def generate_response(user_input):
31
+
32
+ prompt = formatted_prompt(user_input)
33
+ # Prepare the input tensors
34
+ inputs = tokenizer(prompt, return_tensors="pt")#.to('cuda') # Move inputs to GPU
35
+
36
+ generation_config = GenerationConfig(
37
+ max_new_tokens=100, # Allow enough length for full responses
38
+ min_length=5,
39
+ temperature=0.7,
40
+ do_sample=False, # Set to False for deterministic responses
41
+ num_beams=1,
42
+ pad_token_id=tokenizer.eos_token_id, # Set pad_token_id
43
+ truncation=True # Enable truncation
44
+ )
45
+
46
  # Instead of generating a response from the model, return a dummy message
47
+ #dummy_response = "This is a dummy response for the input: " + user_input
48
+ # Generate response
49
+ outputs = model.generate(**inputs, generation_config=generation_config)
50
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+
52
+ return response
53
 
54
  def formatted_prompt(question) -> str:
55
  return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"