app para avalição do modelo treinado
Browse files- app.py +6 -14
- requirements.txt +1 -4
app.py
CHANGED
@@ -1,28 +1,20 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
import torch
|
4 |
|
5 |
# Load model and tokenizer
|
6 |
-
model_name = "unsloth/Llama-3.2-1B-Instruct-
|
7 |
-
|
8 |
-
# Configure quantization
|
9 |
-
bnb_config = BitsAndBytesConfig(
|
10 |
-
load_in_4bit=True,
|
11 |
-
bnb_4bit_use_double_quant=True,
|
12 |
-
bnb_4bit_quant_type="nf4",
|
13 |
-
bnb_4bit_compute_dtype=torch.float16
|
14 |
-
)
|
15 |
|
16 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
17 |
model = AutoModelForCausalLM.from_pretrained(
|
18 |
model_name,
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
)
|
23 |
|
24 |
def generate_text(prompt, max_new_tokens, temperature):
|
25 |
-
inputs = tokenizer(prompt, return_tensors="pt")
|
26 |
|
27 |
with torch.no_grad():
|
28 |
outputs = model.generate(
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
3 |
import torch
|
4 |
|
5 |
# Load model and tokenizer
|
6 |
+
model_name = "unsloth/Llama-3.2-1B-Instruct" # Use the non-quantized version
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
model = AutoModelForCausalLM.from_pretrained(
|
10 |
model_name,
|
11 |
+
torch_dtype=torch.float32,
|
12 |
+
low_cpu_mem_usage=True,
|
13 |
+
device_map="cpu"
|
14 |
)
|
15 |
|
16 |
def generate_text(prompt, max_new_tokens, temperature):
|
17 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
18 |
|
19 |
with torch.no_grad():
|
20 |
outputs = model.generate(
|
requirements.txt
CHANGED
@@ -2,7 +2,4 @@ huggingface_hub==0.25.2
|
|
2 |
|
3 |
gradio
|
4 |
transformers
|
5 |
-
torch
|
6 |
-
accelerate>=0.26.0
|
7 |
-
bitsandbytes
|
8 |
-
|
|
|
2 |
|
3 |
gradio
|
4 |
transformers
|
5 |
+
torch
|
|
|
|
|
|