Spaces:
Sleeping
Sleeping
Commit
·
8a85f9f
1
Parent(s):
70f5edf
device option cuda or cpu
Browse files- app.py +7 -5
- requirements.txt +5 -5
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
from flask import Flask, jsonify, request
|
3 |
from flask_cors import CORS
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
@@ -14,6 +15,7 @@ CORS(app, resources={r"api/predict/*": {"origins": ["http://localhost:3000", "ht
|
|
14 |
# Global variables for model and tokenizer
|
15 |
model = None
|
16 |
tokenizer = None
|
|
|
17 |
|
18 |
def get_model_and_tokenizer(model_id):
|
19 |
global model, tokenizer
|
@@ -23,9 +25,9 @@ def get_model_and_tokenizer(model_id):
|
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
|
24 |
tokenizer.pad_token = tokenizer.eos_token
|
25 |
|
26 |
-
print(f"Loading model
|
27 |
-
# Load the model
|
28 |
-
model = AutoModelForCausalLM.from_pretrained(model_id) #, device_map="auto")
|
29 |
model.config.use_cache = False
|
30 |
|
31 |
except Exception as e:
|
@@ -38,8 +40,8 @@ def generate_response(user_input, model_id):
|
|
38 |
if model is None or tokenizer is None:
|
39 |
get_model_and_tokenizer(model_id) # Load model and tokenizer
|
40 |
|
41 |
-
# Prepare the input tensors
|
42 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
43 |
|
44 |
generation_config = GenerationConfig(
|
45 |
max_new_tokens=100,
|
|
|
1 |
import os
|
2 |
+
import torch
|
3 |
from flask import Flask, jsonify, request
|
4 |
from flask_cors import CORS
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
|
|
15 |
# Global variables for model and tokenizer
|
16 |
model = None
|
17 |
tokenizer = None
|
18 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Use GPU if available
|
19 |
|
20 |
def get_model_and_tokenizer(model_id):
|
21 |
global model, tokenizer
|
|
|
25 |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
|
26 |
tokenizer.pad_token = tokenizer.eos_token
|
27 |
|
28 |
+
print(f"Loading model for model_id: {model_id}")
|
29 |
+
# Load the model and move it to the specified device
|
30 |
+
model = AutoModelForCausalLM.from_pretrained(model_id).to(device) #, device_map="auto")
|
31 |
model.config.use_cache = False
|
32 |
|
33 |
except Exception as e:
|
|
|
40 |
if model is None or tokenizer is None:
|
41 |
get_model_and_tokenizer(model_id) # Load model and tokenizer
|
42 |
|
43 |
+
# Prepare the input tensors and move them to the appropriate device
|
44 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
45 |
|
46 |
generation_config = GenerationConfig(
|
47 |
max_new_tokens=100,
|
requirements.txt
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
flask
|
2 |
flask_cors
|
3 |
huggingface-hub
|
4 |
-
transformers
|
5 |
-
torch
|
6 |
-
accelerate
|
7 |
-
bitsandbytes
|
8 |
peft
|
9 |
-
trl
|
|
|
1 |
flask
|
2 |
flask_cors
|
3 |
huggingface-hub
|
4 |
+
transformers>=4.30.0
|
5 |
+
torch>=2.0.0
|
6 |
+
accelerate>=0.18.0
|
7 |
+
bitsandbytes-cuda117 # Replace 'cuda117' with your specific CUDA version if different
|
8 |
peft
|
9 |
+
trl
|