nmarafo commited on
Commit
5f66366
·
verified ·
1 Parent(s): e77c77f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -1,24 +1,26 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
- from threading import Thread
5
  import os
6
 
7
- import torch
8
  print("CUDA available:", torch.cuda.is_available())
9
  print("CUDA version:", torch.version.cuda)
10
 
11
- # Cargar el token de Hugging Face desde los secretos
12
  token = os.environ.get("HF_TOKEN")
13
 
14
  model_id = "google/shieldgemma-2b"
15
 
16
- # use quantization to lower GPU usage
17
  bnb_config = BitsAndBytesConfig(
18
- load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
 
 
 
19
  )
20
 
21
- tokenizer = AutoTokenizer.from_pretrained(model_id,token=token)
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_id,
24
  torch_dtype=torch.bfloat16,
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
 
4
  import os
5
 
6
+ # Check CUDA availability
7
  print("CUDA available:", torch.cuda.is_available())
8
  print("CUDA version:", torch.version.cuda)
9
 
10
+ # Load the Hugging Face token from secrets
11
  token = os.environ.get("HF_TOKEN")
12
 
13
  model_id = "google/shieldgemma-2b"
14
 
15
+ # Use quantization to lower GPU usage
16
  bnb_config = BitsAndBytesConfig(
17
+ load_in_4bit=True,
18
+ bnb_4bit_use_double_quant=True,
19
+ bnb_4bit_quant_type="nf4",
20
+ bnb_4bit_compute_dtype=torch.bfloat16
21
  )
22
 
23
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
24
  model = AutoModelForCausalLM.from_pretrained(
25
  model_id,
26
  torch_dtype=torch.bfloat16,