s1.1-32B

Runtime error

bobber commited on 17 days ago

Commit

f50e1fa

verified ·

1 Parent(s): 588eaad

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,11 +5,11 @@ from transformers import AutoProcessor, Llama4ForConditionalGeneration
 import torch
-from transformers import BitsAndBytesConfig
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    llm_int8_enable_fp32_cpu_offload=True,
-)
 #Qwen/Qwen2.5-14B-Instruct-1M
 #Qwen/Qwen2-0.5B
@@ -18,7 +18,8 @@ bnb_config = BitsAndBytesConfig(
 # filename = "simplescaling_s1-32B-Q4_K_S.gguf"
 # model_name = "simplescaling/s1.1-32B"
 # model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF"
-model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
 filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
 torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
 cache_dir = "/data"
@@ -35,9 +36,9 @@ model = Llama4ForConditionalGeneration.from_pretrained(
     model_name,
     attn_implementation="flex_attention",
     # gguf_file=filename,
-    # cache_dir = cache_dir,
     torch_dtype=torch_dtype,
-    quantization_config=bnb_config,
     device_map="auto",
 )

 import torch
+# from transformers import BitsAndBytesConfig
+# bnb_config = BitsAndBytesConfig(
+#     load_in_4bit=True,
+#     llm_int8_enable_fp32_cpu_offload=True,
+# )
 #Qwen/Qwen2.5-14B-Instruct-1M
 #Qwen/Qwen2-0.5B
 # filename = "simplescaling_s1-32B-Q4_K_S.gguf"
 # model_name = "simplescaling/s1.1-32B"
 # model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF"
+# model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
+model_name = "meta-llama/Llama-4-Scout-17B-16E"
 filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
 torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
 cache_dir = "/data"
     model_name,
     attn_implementation="flex_attention",
     # gguf_file=filename,
+    cache_dir = cache_dir,
     torch_dtype=torch_dtype,
+    # quantization_config=bnb_config,
     device_map="auto",
 )