FLUXllama

Running on Zero

App Files Files Community

ginipick commited on 13 days ago

Commit

423b272

verified ·

1 Parent(s): 7a61cf3

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -3

app.py CHANGED Viewed

@@ -94,9 +94,20 @@ def initialize_models():
     if not model_initialized:
         print("Initializing models...")
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        t5 = HFEmbedder("DeepFloyd/t5-v1_1-xxl", max_length=512, torch_dtype=torch.bfloat16).to(device)
-        clip = HFEmbedder("openai/clip-vit-large-patch14", max_length=77, torch_dtype=torch.bfloat16).to(device)
-        ae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16).to(device)
         # Load the NF4 quantized checkpoint
         from huggingface_hub import hf_hub_download
@@ -106,6 +117,11 @@ def initialize_models():
         sd = {k.replace("model.diffusion_model.", ""): v for k, v in sd.items() if "model.diffusion_model" in k}
         model = Flux().to(dtype=torch.bfloat16, device=device)
         result = model.load_state_dict(sd)
         model_initialized = True
         print("Models initialized successfully!")
@@ -214,6 +230,7 @@ if BNB_AVAILABLE:
     original_linear = nn.Linear
     nn.Linear = Linear
 else:
     print("Warning: BitsAndBytes not available, using standard Linear layers")
 # ---------------- Model ----------------

     if not model_initialized:
         print("Initializing models...")
         device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Temporarily restore original Linear for loading standard models
+        original_linear = nn.Linear
+        if BNB_AVAILABLE:
+            nn.Linear = original_linear
+        # Load standard models without quantization
+        t5 = HFEmbedder("DeepFloyd/t5-v1_1-xxl", max_length=512, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True).to(device)
+        clip = HFEmbedder("openai/clip-vit-large-patch14", max_length=77, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True).to(device)
+        ae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True).to(device)
+        # Re-apply quantized Linear for Flux model
+        if BNB_AVAILABLE:
+            nn.Linear = Linear
         # Load the NF4 quantized checkpoint
         from huggingface_hub import hf_hub_download
         sd = {k.replace("model.diffusion_model.", ""): v for k, v in sd.items() if "model.diffusion_model" in k}
         model = Flux().to(dtype=torch.bfloat16, device=device)
         result = model.load_state_dict(sd)
+        # Restore original Linear
+        if BNB_AVAILABLE:
+            nn.Linear = original_linear
         model_initialized = True
         print("Models initialized successfully!")
     original_linear = nn.Linear
     nn.Linear = Linear
 else:
+    original_linear = nn.Linear
     print("Warning: BitsAndBytes not available, using standard Linear layers")
 # ---------------- Model ----------------