Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,23 @@
|
|
|
|
1 |
from peft import PeftModel
|
2 |
-
from transformers import AutoModelForCausalLM
|
3 |
|
|
|
|
|
|
|
|
|
4 |
base_model = AutoModelForCausalLM.from_pretrained("google/gemma-3-1b-pt")
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
from peft import PeftModel
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
|
5 |
+
# Load tokenizer
|
6 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-pt")
|
7 |
+
|
8 |
+
# Load base model on CPU
|
9 |
base_model = AutoModelForCausalLM.from_pretrained("google/gemma-3-1b-pt")
|
10 |
+
|
11 |
+
# Load fine-tuned PEFT model
|
12 |
+
model = PeftModel.from_pretrained(base_model, "hackergeek98/gemma-finetuned")
|
13 |
+
|
14 |
+
# Ensure model runs on CPU
|
15 |
+
model = model.to("cpu")
|
16 |
+
|
17 |
+
# Test inference
|
18 |
+
input_text = "Hello, how are you?"
|
19 |
+
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cpu")
|
20 |
+
|
21 |
+
# Generate output
|
22 |
+
output = model.generate(input_ids, max_length=50)
|
23 |
+
print(tokenizer.decode(output[0], skip_special_tokens=True))
|