bobber commited on
Commit
f50e1fa
·
verified ·
1 Parent(s): 588eaad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -5,11 +5,11 @@ from transformers import AutoProcessor, Llama4ForConditionalGeneration
5
 
6
  import torch
7
 
8
- from transformers import BitsAndBytesConfig
9
- bnb_config = BitsAndBytesConfig(
10
- load_in_4bit=True,
11
- llm_int8_enable_fp32_cpu_offload=True,
12
- )
13
 
14
  #Qwen/Qwen2.5-14B-Instruct-1M
15
  #Qwen/Qwen2-0.5B
@@ -18,7 +18,8 @@ bnb_config = BitsAndBytesConfig(
18
  # filename = "simplescaling_s1-32B-Q4_K_S.gguf"
19
  # model_name = "simplescaling/s1.1-32B"
20
  # model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF"
21
- model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
 
22
  filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
23
  torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
24
  cache_dir = "/data"
@@ -35,9 +36,9 @@ model = Llama4ForConditionalGeneration.from_pretrained(
35
  model_name,
36
  attn_implementation="flex_attention",
37
  # gguf_file=filename,
38
- # cache_dir = cache_dir,
39
  torch_dtype=torch_dtype,
40
- quantization_config=bnb_config,
41
  device_map="auto",
42
  )
43
 
 
5
 
6
  import torch
7
 
8
+ # from transformers import BitsAndBytesConfig
9
+ # bnb_config = BitsAndBytesConfig(
10
+ # load_in_4bit=True,
11
+ # llm_int8_enable_fp32_cpu_offload=True,
12
+ # )
13
 
14
  #Qwen/Qwen2.5-14B-Instruct-1M
15
  #Qwen/Qwen2-0.5B
 
18
  # filename = "simplescaling_s1-32B-Q4_K_S.gguf"
19
  # model_name = "simplescaling/s1.1-32B"
20
  # model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF"
21
+ # model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
22
+ model_name = "meta-llama/Llama-4-Scout-17B-16E"
23
  filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
24
  torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
25
  cache_dir = "/data"
 
36
  model_name,
37
  attn_implementation="flex_attention",
38
  # gguf_file=filename,
39
+ cache_dir = cache_dir,
40
  torch_dtype=torch_dtype,
41
+ # quantization_config=bnb_config,
42
  device_map="auto",
43
  )
44