asdaswadefswefr commited on
Commit
041e5ac
β€’
1 Parent(s): 2d8fdae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -1,7 +1,15 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
 
 
 
 
 
 
 
 
5
  # Inicializa o modelo e tokenizer
6
  model_name = "Orenguteng/Llama-3-8B-Lexi-Uncensored"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -9,7 +17,7 @@ model = AutoModelForCausalLM.from_pretrained(
9
  model_name,
10
  torch_dtype=torch.float16,
11
  device_map="auto",
12
- load_in_8bit=True # Isso ajuda a reduzir o uso de memΓ³ria
13
  )
14
 
15
  def generate_text(prompt):
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import torch
4
 
5
+ # Configuração da quantização
6
+ quantization_config = BitsAndBytesConfig(
7
+ load_in_4bit=True, # ou use True para 4-bit
8
+ bnb_4bit_compute_dtype=torch.float16,
9
+ bnb_4bit_use_double_quant=True,
10
+ bnb_4bit_quant_type="nf4"
11
+ )
12
+
13
  # Inicializa o modelo e tokenizer
14
  model_name = "Orenguteng/Llama-3-8B-Lexi-Uncensored"
15
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
17
  model_name,
18
  torch_dtype=torch.float16,
19
  device_map="auto",
20
+ quantization_config=quantization_config
21
  )
22
 
23
  def generate_text(prompt):