Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
a4e223a
1
Parent(s):
097ebe4
remove quant, import at torch.bfloat16
Browse files- llmcalc_gradio.py +3 -2
llmcalc_gradio.py
CHANGED
@@ -9,13 +9,14 @@ import gradio as gr
|
|
9 |
# quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
10 |
torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")
|
11 |
|
12 |
-
torch_dtype = torch.
|
13 |
|
14 |
llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
|
15 |
# quantization_config=quantization_config,
|
16 |
torch_dtype=torch_dtype,
|
17 |
device_map=torch_device,
|
18 |
-
load_in_4bit=True
|
|
|
19 |
|
20 |
llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
21 |
|
|
|
9 |
# quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
10 |
torch_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.mps.is_available() else "cpu")
|
11 |
|
12 |
+
torch_dtype = torch.bfloat16 if torch_device in ["cuda", "mps"] else torch.float32
|
13 |
|
14 |
llama_model=AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct",
|
15 |
# quantization_config=quantization_config,
|
16 |
torch_dtype=torch_dtype,
|
17 |
device_map=torch_device,
|
18 |
+
# load_in_4bit=True #for puny devices like mine.
|
19 |
+
)
|
20 |
|
21 |
llama_tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
22 |
|