YALCINKAYA commited on
Commit
8a85f9f
·
1 Parent(s): 70f5edf

device option cuda or cpu

Browse files
Files changed (2) hide show
  1. app.py +7 -5
  2. requirements.txt +5 -5
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  from flask import Flask, jsonify, request
3
  from flask_cors import CORS
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
@@ -14,6 +15,7 @@ CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "ht
14
  # Global variables for model and tokenizer
15
  model = None
16
  tokenizer = None
 
17
 
18
  def get_model_and_tokenizer(model_id):
19
  global model, tokenizer
@@ -23,9 +25,9 @@ def get_model_and_tokenizer(model_id):
23
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
24
  tokenizer.pad_token = tokenizer.eos_token
25
 
26
- print(f"Loading model and for model_id: {model_id}")
27
- # Load the model
28
- model = AutoModelForCausalLM.from_pretrained(model_id) #, device_map="auto")
29
  model.config.use_cache = False
30
 
31
  except Exception as e:
@@ -38,8 +40,8 @@ def generate_response(user_input, model_id):
38
  if model is None or tokenizer is None:
39
  get_model_and_tokenizer(model_id) # Load model and tokenizer
40
 
41
- # Prepare the input tensors
42
- inputs = tokenizer(prompt, return_tensors="pt") # Move inputs to GPU if available
43
 
44
  generation_config = GenerationConfig(
45
  max_new_tokens=100,
 
1
  import os
2
+ import torch
3
  from flask import Flask, jsonify, request
4
  from flask_cors import CORS
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 
15
  # Global variables for model and tokenizer
16
  model = None
17
  tokenizer = None
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Use GPU if available
19
 
20
  def get_model_and_tokenizer(model_id):
21
  global model, tokenizer
 
25
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
26
  tokenizer.pad_token = tokenizer.eos_token
27
 
28
+ print(f"Loading model for model_id: {model_id}")
29
+ # Load the model and move it to the specified device
30
+ model = AutoModelForCausalLM.from_pretrained(model_id).to(device) #, device_map="auto")
31
  model.config.use_cache = False
32
 
33
  except Exception as e:
 
40
  if model is None or tokenizer is None:
41
  get_model_and_tokenizer(model_id) # Load model and tokenizer
42
 
43
+ # Prepare the input tensors and move them to the appropriate device
44
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
45
 
46
  generation_config = GenerationConfig(
47
  max_new_tokens=100,
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
  flask
2
  flask_cors
3
  huggingface-hub
4
- transformers
5
- torch
6
- accelerate
7
- bitsandbytes
8
  peft
9
- trl
 
1
  flask
2
  flask_cors
3
  huggingface-hub
4
+ transformers>=4.30.0
5
+ torch>=2.0.0
6
+ accelerate>=0.18.0
7
+ bitsandbytes-cuda117 # Replace 'cuda117' with your specific CUDA version if different
8
  peft
9
+ trl