YALCINKAYA commited on
Commit
05f391e
·
1 Parent(s): 7fa6395
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
- import torch
3
  from flask import Flask, jsonify, request
4
  from flask_cors import CORS
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
@@ -15,7 +14,6 @@ CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "ht
15
  # Global variables for model and tokenizer
16
  model = None
17
  tokenizer = None
18
- device = torch.device("cpu") # Explicitly use CPU
19
 
20
  def get_model_and_tokenizer(model_id):
21
  global model, tokenizer
@@ -25,12 +23,11 @@ def get_model_and_tokenizer(model_id):
25
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
26
  tokenizer.pad_token = tokenizer.eos_token
27
 
28
- print(f"Loading model for model_id: {model_id}")
29
- # Load the model and move it to the specified device
30
- model = AutoModelForCausalLM.from_pretrained(model_id).to(device) #, device_map="auto")
31
  model.config.use_cache = False
32
- print("Model and tokenizer successfully loaded.")
33
-
34
  except Exception as e:
35
  print(f"Error loading model: {e}")
36
 
@@ -41,13 +38,13 @@ def generate_response(user_input, model_id):
41
  if model is None or tokenizer is None:
42
  get_model_and_tokenizer(model_id) # Load model and tokenizer
43
 
44
- # Prepare the input tensors and move them to the appropriate device
45
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
46
 
47
  generation_config = GenerationConfig(
48
  max_new_tokens=100,
49
  min_length=5,
50
- #temperature=0.7,
51
  do_sample=False,
52
  num_beams=1,
53
  pad_token_id=tokenizer.eos_token_id,
 
1
  import os
 
2
  from flask import Flask, jsonify, request
3
  from flask_cors import CORS
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 
14
  # Global variables for model and tokenizer
15
  model = None
16
  tokenizer = None
 
17
 
18
  def get_model_and_tokenizer(model_id):
19
  global model, tokenizer
 
23
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
24
  tokenizer.pad_token = tokenizer.eos_token
25
 
26
+ print(f"Loading model and for model_id: {model_id}")
27
+ # Load the model
28
+ model = AutoModelForCausalLM.from_pretrained(model_id) #, device_map="auto")
29
  model.config.use_cache = False
30
+
 
31
  except Exception as e:
32
  print(f"Error loading model: {e}")
33
 
 
38
  if model is None or tokenizer is None:
39
  get_model_and_tokenizer(model_id) # Load model and tokenizer
40
 
41
+ # Prepare the input tensors
42
+ inputs = tokenizer(prompt, return_tensors="pt") # Move inputs to GPU if available
43
 
44
  generation_config = GenerationConfig(
45
  max_new_tokens=100,
46
  min_length=5,
47
+ temperature=0.7,
48
  do_sample=False,
49
  num_beams=1,
50
  pad_token_id=tokenizer.eos_token_id,