FLUXllama

Running on Zero

App Files Files Community

ginipick commited on 13 days ago

Commit

03a1ea8

verified ·

1 Parent(s): a5ab155

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -102

app.py CHANGED Viewed

@@ -1,9 +1,24 @@
 import os
-# Set environment variable before importing torch to avoid nested tensor issues
 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
-# Import spaces FIRST before any torch imports
-import spaces
 import time
 import gradio as gr
@@ -15,9 +30,6 @@ import math
 from typing import Callable
 from tqdm import tqdm
-import bitsandbytes as bnb
-from bitsandbytes.nn.modules import Params4bit, QuantState
 import random
 from einops import rearrange, repeat
 from diffusers import AutoencoderKL
@@ -25,6 +37,15 @@ from torch import Tensor, nn
 from transformers import CLIPTextModel, CLIPTokenizer
 from transformers import T5EncoderModel, T5Tokenizer
 # ---------------- Encoders ----------------
 class HFEmbedder(nn.Module):
@@ -90,106 +111,110 @@ def initialize_models():
 # ---------------- NF4 ----------------
-def functional_linear_4bits(x, weight, bias):
-    import bitsandbytes as bnb
-    out = bnb.matmul_4bit(x, weight.t(), bias=bias, quant_state=weight.quant_state)
-    out = out.to(x)
-    return out
-class ForgeParams4bit(Params4bit):
-    """Subclass to force re-quantization to GPU if needed."""
-    def to(self, *args, **kwargs):
-        import torch
-        device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
-        if device is not None and device.type == "cuda" and not self.bnb_quantized:
-            return self._quantize(device)
-        else:
-            n = ForgeParams4bit(
-                torch.nn.Parameter.to(self, device=device, dtype=dtype, non_blocking=non_blocking),
-                requires_grad=self.requires_grad,
-                quant_state=self.quant_state,
-                compress_statistics=False,
-                blocksize=64,
-                quant_type=self.quant_type,
-                quant_storage=self.quant_storage,
-                bnb_quantized=self.bnb_quantized,
-                module=self.module
-            )
-            self.module.quant_state = n.quant_state
-            self.data = n.data
-            self.quant_state = n.quant_state
-            return n
-class ForgeLoader4Bit(nn.Module):
-    def __init__(self, *, device, dtype, quant_type, **kwargs):
-        super().__init__()
-        self.dummy = nn.Parameter(torch.empty(1, device=device, dtype=dtype))
-        self.weight = None
-        self.quant_state = None
-        self.bias = None
-        self.quant_type = quant_type
-    def _save_to_state_dict(self, destination, prefix, keep_vars):
-        super()._save_to_state_dict(destination, prefix, keep_vars)
-        from bitsandbytes.nn.modules import QuantState
-        quant_state = getattr(self.weight, "quant_state", None)
-        if quant_state is not None:
-            for k, v in quant_state.as_dict(packed=True).items():
-                destination[prefix + "weight." + k] = v if keep_vars else v.detach()
-        return
-    def _load_from_state_dict(
-        self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
-    ):
-        from bitsandbytes.nn.modules import Params4bit
-        import torch
-        quant_state_keys = {k[len(prefix + "weight."):] for k in state_dict.keys() if k.startswith(prefix + "weight.")}
-        if any('bitsandbytes' in k for k in quant_state_keys):
-            quant_state_dict = {k: state_dict[prefix + "weight." + k] for k in quant_state_keys}
-            self.weight = ForgeParams4bit.from_prequantized(
-                data=state_dict[prefix + 'weight'],
-                quantized_stats=quant_state_dict,
-                requires_grad=False,
-                device=torch.device('cuda'),
-                module=self
-            )
-            self.quant_state = self.weight.quant_state
-            if prefix + 'bias' in state_dict:
-                self.bias = torch.nn.Parameter(state_dict[prefix + 'bias'].to(self.dummy))
-            del self.dummy
-        elif hasattr(self, 'dummy'):
-            if prefix + 'weight' in state_dict:
-                self.weight = ForgeParams4bit(
-                    state_dict[prefix + 'weight'].to(self.dummy),
-                    requires_grad=False,
-                    compress_statistics=True,
                     quant_type=self.quant_type,
-                    quant_storage=torch.uint8,
-                    module=self,
                 )
                 self.quant_state = self.weight.quant_state
-            if prefix + 'bias' in state_dict:
-                self.bias = torch.nn.Parameter(state_dict[prefix + 'bias'].to(self.dummy))
-            del self.dummy
-        else:
-            super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
-class Linear(ForgeLoader4Bit):
-    def __init__(self, *args, device=None, dtype=None, **kwargs):
-        super().__init__(device=device, dtype=dtype, quant_type='nf4')
-    def forward(self, x):
-        self.weight.quant_state = self.quant_state
-        if self.bias is not None and self.bias.dtype != x.dtype:
-            self.bias.data = self.bias.data.to(x.dtype)
-        return functional_linear_4bits(x, self.weight, self.bias)
-# Override Linear after all torch imports are done
-nn.Linear = Linear
 # ---------------- Model ----------------

 import os
+import sys
+# Disable bitsandbytes triton integration to avoid conflicts
+os.environ["BITSANDBYTES_NOWELCOME"] = "1"
 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+# Try to handle spaces import gracefully
+try:
+    import spaces
+    SPACES_AVAILABLE = True
+except Exception as e:
+    print(f"Warning: Could not import spaces: {e}")
+    SPACES_AVAILABLE = False
+    # Create a dummy decorator if spaces is not available
+    class spaces:
+        @staticmethod
+        def GPU(duration=None):
+            def decorator(func):
+                return func
+            return decorator
 import time
 import gradio as gr
 from typing import Callable
 from tqdm import tqdm
 import random
 from einops import rearrange, repeat
 from diffusers import AutoencoderKL
 from transformers import CLIPTextModel, CLIPTokenizer
 from transformers import T5EncoderModel, T5Tokenizer
+# Import bitsandbytes after spaces to avoid conflicts
+try:
+    import bitsandbytes as bnb
+    from bitsandbytes.nn.modules import Params4bit, QuantState
+    BNB_AVAILABLE = True
+except Exception as e:
+    print(f"Warning: Could not import bitsandbytes: {e}")
+    BNB_AVAILABLE = False
 # ---------------- Encoders ----------------
 class HFEmbedder(nn.Module):
 # ---------------- NF4 ----------------
+if BNB_AVAILABLE:
+    def functional_linear_4bits(x, weight, bias):
+        import bitsandbytes as bnb
+        out = bnb.matmul_4bit(x, weight.t(), bias=bias, quant_state=weight.quant_state)
+        out = out.to(x)
+        return out
+    class ForgeParams4bit(Params4bit):
+        """Subclass to force re-quantization to GPU if needed."""
+        def to(self, *args, **kwargs):
+            import torch
+            device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
+            if device is not None and device.type == "cuda" and not self.bnb_quantized:
+                return self._quantize(device)
+            else:
+                n = ForgeParams4bit(
+                    torch.nn.Parameter.to(self, device=device, dtype=dtype, non_blocking=non_blocking),
+                    requires_grad=self.requires_grad,
+                    quant_state=self.quant_state,
+                    compress_statistics=False,
+                    blocksize=64,
                     quant_type=self.quant_type,
+                    quant_storage=self.quant_storage,
+                    bnb_quantized=self.bnb_quantized,
+                    module=self.module
+                )
+                self.module.quant_state = n.quant_state
+                self.data = n.data
+                self.quant_state = n.quant_state
+                return n
+    class ForgeLoader4Bit(nn.Module):
+        def __init__(self, *, device, dtype, quant_type, **kwargs):
+            super().__init__()
+            self.dummy = nn.Parameter(torch.empty(1, device=device, dtype=dtype))
+            self.weight = None
+            self.quant_state = None
+            self.bias = None
+            self.quant_type = quant_type
+        def _save_to_state_dict(self, destination, prefix, keep_vars):
+            super()._save_to_state_dict(destination, prefix, keep_vars)
+            from bitsandbytes.nn.modules import QuantState
+            quant_state = getattr(self.weight, "quant_state", None)
+            if quant_state is not None:
+                for k, v in quant_state.as_dict(packed=True).items():
+                    destination[prefix + "weight." + k] = v if keep_vars else v.detach()
+            return
+        def _load_from_state_dict(
+            self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
+        ):
+            from bitsandbytes.nn.modules import Params4bit
+            import torch
+            quant_state_keys = {k[len(prefix + "weight."):] for k in state_dict.keys() if k.startswith(prefix + "weight.")}
+            if any('bitsandbytes' in k for k in quant_state_keys):
+                quant_state_dict = {k: state_dict[prefix + "weight." + k] for k in quant_state_keys}
+                self.weight = ForgeParams4bit.from_prequantized(
+                    data=state_dict[prefix + 'weight'],
+                    quantized_stats=quant_state_dict,
+                    requires_grad=False,
+                    device=torch.device('cuda'),
+                    module=self
                 )
                 self.quant_state = self.weight.quant_state
+                if prefix + 'bias' in state_dict:
+                    self.bias = torch.nn.Parameter(state_dict[prefix + 'bias'].to(self.dummy))
+                del self.dummy
+            elif hasattr(self, 'dummy'):
+                if prefix + 'weight' in state_dict:
+                    self.weight = ForgeParams4bit(
+                        state_dict[prefix + 'weight'].to(self.dummy),
+                        requires_grad=False,
+                        compress_statistics=True,
+                        quant_type=self.quant_type,
+                        quant_storage=torch.uint8,
+                        module=self,
+                    )
+                    self.quant_state = self.weight.quant_state
+                if prefix + 'bias' in state_dict:
+                    self.bias = torch.nn.Parameter(state_dict[prefix + 'bias'].to(self.dummy))
+                del self.dummy
+            else:
+                super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
+    class Linear(ForgeLoader4Bit):
+        def __init__(self, *args, device=None, dtype=None, **kwargs):
+            super().__init__(device=device, dtype=dtype, quant_type='nf4')
+        def forward(self, x):
+            self.weight.quant_state = self.quant_state
+            if self.bias is not None and self.bias.dtype != x.dtype:
+                self.bias.data = self.bias.data.to(x.dtype)
+            return functional_linear_4bits(x, self.weight, self.bias)
+    # Override Linear after all torch imports are done
+    original_linear = nn.Linear
+    nn.Linear = Linear
+else:
+    print("Warning: BitsAndBytes not available, using standard Linear layers")
 # ---------------- Model ----------------