Update app.py
Browse files
app.py
CHANGED
@@ -3,31 +3,22 @@ from peft import PeftModel, PeftConfig
|
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
import torch
|
5 |
|
|
|
|
|
|
|
6 |
|
7 |
-
|
8 |
-
device = torch.device("cuda")
|
9 |
-
print("GPU is available!")
|
10 |
-
else:
|
11 |
-
device = torch.device("cpu")
|
12 |
-
print("GPU is not available, using CPU.")
|
13 |
-
|
14 |
-
# Load the model and config when the script starts
|
15 |
peft_model_id = "phearion/bigbrain-v0.0.1"
|
16 |
config = PeftConfig.from_pretrained(peft_model_id)
|
17 |
-
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
|
18 |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
19 |
-
|
20 |
-
# Load the Lora model
|
21 |
-
model = PeftModel.from_pretrained(model, peft_model_id)
|
22 |
|
23 |
def greet(text):
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
|
30 |
|
31 |
-
|
32 |
-
iface
|
33 |
-
iface.launch()
|
|
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
import torch
|
5 |
|
6 |
+
# Device configuration (prioritize GPU if available)
|
7 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
8 |
+
model.to(device) # Move model to the selected device
|
9 |
|
10 |
+
# Load models and tokenizer efficiently
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
peft_model_id = "phearion/bigbrain-v0.0.1"
|
12 |
config = PeftConfig.from_pretrained(peft_model_id)
|
|
|
13 |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
14 |
+
model = PeftModel.from_pretrained(peft_model_id).to(device)
|
|
|
|
|
15 |
|
16 |
def greet(text):
|
17 |
+
with torch.no_grad(): # Disable gradient calculation for inference
|
18 |
+
batch = tokenizer(text, return_tensors='pt').to(device) # Move tensors to device
|
19 |
+
with torch.cuda.amp.autocast(): # Enable mixed-precision if available
|
20 |
+
output_tokens = model.generate(**batch, max_new_tokens=15)
|
|
|
21 |
return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
|
22 |
|
23 |
+
iface = gr.Interface(fn=greet, inputs="text", outputs="text", title="PEFT Model for Big Brain", live=True)
|
24 |
+
iface.launch(share=True) # Share directly to Gradio Space
|
|