Titobsala commited on
Commit
1d05594
·
1 Parent(s): cd8908c

app para avalição do modelo treinado

Browse files
Files changed (2) hide show
  1. app.py +6 -14
  2. requirements.txt +1 -4
app.py CHANGED
@@ -1,28 +1,20 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import torch
4
 
5
  # Load model and tokenizer
6
- model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
7
-
8
- # Configure quantization
9
- bnb_config = BitsAndBytesConfig(
10
- load_in_4bit=True,
11
- bnb_4bit_use_double_quant=True,
12
- bnb_4bit_quant_type="nf4",
13
- bnb_4bit_compute_dtype=torch.float16
14
- )
15
 
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_name,
19
- quantization_config=bnb_config,
20
- device_map="auto",
21
- trust_remote_code=True
22
  )
23
 
24
  def generate_text(prompt, max_new_tokens, temperature):
25
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
26
 
27
  with torch.no_grad():
28
  outputs = model.generate(
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
  # Load model and tokenizer
6
+ model_name = "unsloth/Llama-3.2-1B-Instruct" # Use the non-quantized version
 
 
 
 
 
 
 
 
7
 
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_name,
11
+ torch_dtype=torch.float32,
12
+ low_cpu_mem_usage=True,
13
+ device_map="cpu"
14
  )
15
 
16
  def generate_text(prompt, max_new_tokens, temperature):
17
+ inputs = tokenizer(prompt, return_tensors="pt")
18
 
19
  with torch.no_grad():
20
  outputs = model.generate(
requirements.txt CHANGED
@@ -2,7 +2,4 @@ huggingface_hub==0.25.2
2
 
3
  gradio
4
  transformers
5
- torch
6
- accelerate>=0.26.0
7
- bitsandbytes
8
-
 
2
 
3
  gradio
4
  transformers
5
+ torch