FLUXllama

Running on Zero

App Files Files Community

ginipick commited on 13 days ago

Commit

a7a4022

verified ·

1 Parent(s): fece974

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -26

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
-import spaces
 import time
 import gradio as gr
@@ -14,7 +15,6 @@ from tqdm import tqdm
 import bitsandbytes as bnb
 from bitsandbytes.nn.modules import Params4bit, QuantState
-import torch
 import random
 from einops import rearrange, repeat
 from diffusers import AutoencoderKL
@@ -22,6 +22,9 @@ from torch import Tensor, nn
 from transformers import CLIPTextModel, CLIPTokenizer
 from transformers import T5EncoderModel, T5Tokenizer
 # ---------------- Encoders ----------------
 class HFEmbedder(nn.Module):
@@ -58,10 +61,32 @@ class HFEmbedder(nn.Module):
         )
         return outputs[self.output_key]
-device = "cuda"
-t5 = HFEmbedder("DeepFloyd/t5-v1_1-xxl", max_length=512, torch_dtype=torch.bfloat16).to(device)
-clip = HFEmbedder("openai/clip-vit-large-patch14", max_length=77, torch_dtype=torch.bfloat16).to(device)
-ae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16).to(device)
 # ---------------- NF4 ----------------
@@ -163,7 +188,7 @@ class Linear(ForgeLoader4Bit):
             self.bias.data = self.bias.data.to(x.dtype)
         return functional_linear_4bits(x, self.weight, self.bias)
-import torch.nn as nn
 nn.Linear = Linear
 # ---------------- Model ----------------
@@ -608,34 +633,22 @@ def get_image(image) -> torch.Tensor | None:
     img: torch.Tensor = transform(image)
     return img[None, ...]
-# Load the NF4 quantized checkpoint
-from huggingface_hub import hf_hub_download
-from safetensors.torch import load_file
-sd = load_file(hf_hub_download(repo_id="lllyasviel/flux1-dev-bnb-nf4", filename="flux1-dev-bnb-nf4-v2.safetensors"))
-sd = {k.replace("model.diffusion_model.", ""): v for k, v in sd.items() if "model.diffusion_model" in k}
-model = Flux().to(dtype=torch.bfloat16, device="cuda")
-result = model.load_state_dict(sd)
-model_zero_init = False
-@spaces.GPU
 @torch.no_grad()
 def generate_image(
     prompt, width, height, guidance, inference_steps, seed,
     do_img2img, init_image, image2image_strength, resize_img,
     progress=gr.Progress(track_tqdm=True),
 ):
     if seed == 0:
         seed = int(random.random() * 1_000_000)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_device = torch.device(device)
-    global model, model_zero_init
-    if not model_zero_init:
-        model = model.to(torch_device)
-        model_zero_init = True
     if do_img2img and init_image is not None:
         init_image = get_image(init_image)
         if resize_img:
@@ -759,6 +772,5 @@ if __name__ == "__main__":
     demo = create_demo()
     # Enable the queue to handle concurrency
     demo.queue()
-    # Launch with show_api=False and share=True to avoid the "bool is not iterable" error
-    # and the "ValueError: When localhost is not accessible..." error.
-    demo.launch(show_api=False, share=True, server_name="0.0.0.0", mcp_server=True)

 import os
+# Set environment variable before importing torch to avoid nested tensor issues
+os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
 import time
 import gradio as gr
 import bitsandbytes as bnb
 from bitsandbytes.nn.modules import Params4bit, QuantState
 import random
 from einops import rearrange, repeat
 from diffusers import AutoencoderKL
 from transformers import CLIPTextModel, CLIPTokenizer
 from transformers import T5EncoderModel, T5Tokenizer
+# Import spaces after other imports to minimize conflicts
+import spaces
 # ---------------- Encoders ----------------
 class HFEmbedder(nn.Module):
         )
         return outputs[self.output_key]
+# Initialize models without GPU decorator first
+device = "cuda" if torch.cuda.is_available() else "cpu"
+t5 = None
+clip = None
+ae = None
+model = None
+model_initialized = False
+def initialize_models():
+    global t5, clip, ae, model, model_initialized
+    if not model_initialized:
+        print("Initializing models...")
+        t5 = HFEmbedder("DeepFloyd/t5-v1_1-xxl", max_length=512, torch_dtype=torch.bfloat16).to(device)
+        clip = HFEmbedder("openai/clip-vit-large-patch14", max_length=77, torch_dtype=torch.bfloat16).to(device)
+        ae = AutoencoderKL.from_pretrained("black-forest-labs/FLUX.1-dev", subfolder="vae", torch_dtype=torch.bfloat16).to(device)
+        # Load the NF4 quantized checkpoint
+        from huggingface_hub import hf_hub_download
+        from safetensors.torch import load_file
+        sd = load_file(hf_hub_download(repo_id="lllyasviel/flux1-dev-bnb-nf4", filename="flux1-dev-bnb-nf4-v2.safetensors"))
+        sd = {k.replace("model.diffusion_model.", ""): v for k, v in sd.items() if "model.diffusion_model" in k}
+        model = Flux().to(dtype=torch.bfloat16, device=device)
+        result = model.load_state_dict(sd)
+        model_initialized = True
+        print("Models initialized successfully!")
 # ---------------- NF4 ----------------
             self.bias.data = self.bias.data.to(x.dtype)
         return functional_linear_4bits(x, self.weight, self.bias)
+# Override Linear after all torch imports are done
 nn.Linear = Linear
 # ---------------- Model ----------------
     img: torch.Tensor = transform(image)
     return img[None, ...]
+@spaces.GPU(duration=120)
 @torch.no_grad()
 def generate_image(
     prompt, width, height, guidance, inference_steps, seed,
     do_img2img, init_image, image2image_strength, resize_img,
     progress=gr.Progress(track_tqdm=True),
 ):
+    # Initialize models on first run
+    initialize_models()
     if seed == 0:
         seed = int(random.random() * 1_000_000)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     torch_device = torch.device(device)
     if do_img2img and init_image is not None:
         init_image = get_image(init_image)
         if resize_img:
     demo = create_demo()
     # Enable the queue to handle concurrency
     demo.queue()
+    # Launch with appropriate settings
+    demo.launch(show_api=False, share=True)