willsh1997 commited on
Commit
a4e223a
·
1 Parent(s): 097ebe4

remove quant, import at torch.bfloat16

Browse files
Files changed (1) hide show
  1. llmcalc_gradio.py +3 -2
llmcalc_gradio.py CHANGED
@@ -9,13 +9,14 @@ import gradio as gr
9
  # quantization_config = BitsAndBytesConfig(load_in_4bit=True)
10
  torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")
11
 
12
- torch_dtype = torch.float16 if torch_device in ["cuda", "mps"] else torch.float32
13
 
14
  llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
15
  # quantization_config=quantization_config,
16
  torch_dtype=torch_dtype,
17
  device_map=torch_device,
18
- load_in_4bit=True) #for puny devices like mine.
 
19
 
20
  llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
21
 
 
9
  # quantization_config = BitsAndBytesConfig(load_in_4bit=True)
10
  torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")
11
 
12
+ torch_dtype = torch.bfloat16 if torch_device in ["cuda", "mps"] else torch.float32
13
 
14
  llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
15
  # quantization_config=quantization_config,
16
  torch_dtype=torch_dtype,
17
  device_map=torch_device,
18
+ # load_in_4bit=True #for puny devices like mine.
19
+ )
20
 
21
  llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
22