s1.1-32B

Runtime error

bobber commited on 17 days ago

Commit

22e90eb

verified ·

1 Parent(s): ce4b3d4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,7 +11,8 @@ import torch
 # subfolder = "Qwen-0.5B-GRPO/checkpoint-1868"
 # filename = "simplescaling_s1-32B-Q4_K_S.gguf"
 # model_name = "simplescaling/s1.1-32B"
-model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF"
 filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
 torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
 cache_dir = "/data"
@@ -27,14 +28,14 @@ cache_dir = "/data"
 model = Llama4ForConditionalGeneration.from_pretrained(
     model_name,
     attn_implementation="flex_attention",
-    gguf_file=filename,
     torch_dtype=torch_dtype,
     device_map="auto",
-    cache_dir = cache_dir,
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name
-    , gguf_file=filename
     # , subfolder=subfolder
     )
 SYSTEM_PROMPT = """

 # subfolder = "Qwen-0.5B-GRPO/checkpoint-1868"
 # filename = "simplescaling_s1-32B-Q4_K_S.gguf"
 # model_name = "simplescaling/s1.1-32B"
+# model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF"
+model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
 filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
 torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
 cache_dir = "/data"
 model = Llama4ForConditionalGeneration.from_pretrained(
     model_name,
     attn_implementation="flex_attention",
+    # gguf_file=filename,
+    # cache_dir = cache_dir,
     torch_dtype=torch_dtype,
     device_map="auto",
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name
+    # , gguf_file=filename
     # , subfolder=subfolder
     )
 SYSTEM_PROMPT = """