Spaces:

KingNish
/

Realtime-FLUX

Running on Zero

App Files Files Community

KingNish commited on Apr 21

Commit

6934cc4

verified ·

1 Parent(s): 0a437ba

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -57

app.py CHANGED Viewed

@@ -11,8 +11,8 @@ from diffusers.models.attention_processor import AttnProcessor2_0
 from custom_pipeline import FluxWithCFGPipeline
 # --- Torch Optimizations ---
-torch.backends.cuda.matmul.allow_tf32 = True
-torch.backends.cudnn.benchmark = True # Enable cuDNN benchmark for potentially faster convolutions
 # --- Constants ---
 MAX_SEED = np.iinfo(np.int32).max
@@ -27,39 +27,30 @@ ENHANCE_STEPS = 2 # Fixed steps for the enhance button
 # --- Device and Model Setup ---
 dtype = torch.float16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-pipe = None # Initialize pipe to None
-try:
-    pipe = FluxWithCFGPipeline.from_pretrained(
-        "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
-    )
-    pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype)
-    pipe.to(device)
-    # Apply optimizations
-    pipe.unet.set_attn_processor(AttnProcessor2_0())
-    pipe.vae.set_attn_processor(AttnProcessor2_0()) # VAE might benefit too
-    pipe.load_lora_weights('hugovntr/flux-schnell-realism', weight_name='schnell-realism_v2.3.safetensors', adapter_name="better")
-    pipe.set_adapters(["better"], adapter_weights=[1.0])
-    pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0) # Fuse for potential speedup
-    pipe.unload_lora_weights() # Unload after fusing
-    # --- Compilation (Major Speed Optimization) ---
-    pipe.vae.decoder = torch.compile(pipe.vae.decoder, mode="reduce-overhead", fullgraph=True)
-    pipe.vae.encoder = torch.compile(pipe.vae.encoder, mode="reduce-overhead", fullgraph=True)
-    # Clear cache after setup
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-except Exception as e:
-    print(e)
 # --- Inference Function ---
-@spaces.GPU() # Slightly increased duration buffer
 def generate_image(prompt: str, seed: int = 42, width: int = DEFAULT_WIDTH, height: int = DEFAULT_HEIGHT, randomize_seed: bool = False, num_inference_steps: int = DEFAULT_INFERENCE_STEPS, is_enhance: bool = False):
     """Generates an image using the FLUX pipeline with error handling."""
@@ -119,20 +110,6 @@ def generate_image(prompt: str, seed: int = 42, width: int = DEFAULT_WIDTH, heig
         raise gr.Error(f"An error occurred during generation: {e}")
-# --- Real-time Generation Wrapper ---
-# This function checks the realtime toggle before calling the main generation function.
-# It's triggered by changes in prompt or sliders when realtime is enabled.
-def handle_realtime_update(realtime_enabled: bool, prompt: str, seed: int, width: int, height: int, randomize_seed: bool, num_inference_steps: int):
-    if realtime_enabled and pipe is not None:
-        # Call generate_image directly. Errors within generate_image will be caught and raised as gr.Error.
-        # We don't set is_enhance=True for realtime updates.
-        return generate_image(prompt, seed, width, height, randomize_seed, num_inference_steps, is_enhance=False)
-    else:
-        # If realtime is disabled or pipe failed, don't update the image, seed, or latency.
-        # Return gr.update() for each output component to indicate no change.
-        return gr.update(), gr.update(), gr.update()
 # --- Example Prompts ---
 examples = [
     "a tiny astronaut hatching from an egg on the moon",
@@ -195,9 +172,7 @@ with gr.Blocks() as demo:
         fn=generate_image,
         inputs=[prompt, seed, width, height],
         outputs=[result, seed, latency],
-        show_progress="full",
-        queue=False,
-        concurrency_limit=None,
     )
     generateBtn.click(
@@ -206,7 +181,6 @@ with gr.Blocks() as demo:
         outputs=[result, seed, latency],
         show_progress="full",
         api_name="RealtimeFlux",
-        queue=False
     )
     def update_ui(realtime_enabled):
@@ -222,21 +196,14 @@ with gr.Blocks() as demo:
     realtime.change(
         fn=update_ui,
         inputs=[realtime],
-        outputs=[prompt, generateBtn],
-        queue=False,
-        concurrency_limit=None
     )
-    # Removed the intermediate realtime_generation function.
-    # handle_realtime_update checks the realtime toggle internally.
     prompt.submit(
         fn=generate_image,
         inputs=[prompt, seed, width, height, randomize_seed, num_inference_steps],
         outputs=[result, seed, latency],
-        show_progress="full",
-        queue=False,
-        concurrency_limit=None
     )
     for component in [prompt, width, height, num_inference_steps]:
@@ -245,9 +212,7 @@ with gr.Blocks() as demo:
             inputs=[realtime, prompt, seed, width, height, randomize_seed, num_inference_steps],
             outputs=[result, seed, latency],
             show_progress="hidden",
-            trigger_mode="always_last",
-            queue=False,
-            concurrency_limit=None
         )
 # Launch the app

 from custom_pipeline import FluxWithCFGPipeline
 # --- Torch Optimizations ---
+# torch.backends.cuda.matmul.allow_tf32 = True
+# torch.backends.cudnn.benchmark = True # Enable cuDNN benchmark for potentially faster convolutions
 # --- Constants ---
 MAX_SEED = np.iinfo(np.int32).max
 # --- Device and Model Setup ---
 dtype = torch.float16
 device = "cuda" if torch.cuda.is_available() else "cpu"
+pipe = FluxWithCFGPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
+)
+pipe.vae = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype)
+pipe.to(device)
+# Apply optimizations
+pipe.unet.set_attn_processor(AttnProcessor2_0())
+pipe.vae.set_attn_processor(AttnProcessor2_0()) # VAE might benefit too
+pipe.load_lora_weights('hugovntr/flux-schnell-realism', weight_name='schnell-realism_v2.3.safetensors', adapter_name="better")
+pipe.set_adapters(["better"], adapter_weights=[1.0])
+pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0) # Fuse for potential speedup
+pipe.unload_lora_weights() # Unload after fusing
+# --- Compilation (Major Speed Optimization) ---
+pipe.vae.decoder = torch.compile(pipe.vae.decoder, mode="reduce-overhead", fullgraph=True)
+pipe.vae.encoder = torch.compile(pipe.vae.encoder, mode="reduce-overhead", fullgraph=True)
 # --- Inference Function ---
+@spaces.GPU
 def generate_image(prompt: str, seed: int = 42, width: int = DEFAULT_WIDTH, height: int = DEFAULT_HEIGHT, randomize_seed: bool = False, num_inference_steps: int = DEFAULT_INFERENCE_STEPS, is_enhance: bool = False):
     """Generates an image using the FLUX pipeline with error handling."""
         raise gr.Error(f"An error occurred during generation: {e}")
 # --- Example Prompts ---
 examples = [
     "a tiny astronaut hatching from an egg on the moon",
         fn=generate_image,
         inputs=[prompt, seed, width, height],
         outputs=[result, seed, latency],
+        show_progress="full"
     )
     generateBtn.click(
         outputs=[result, seed, latency],
         show_progress="full",
         api_name="RealtimeFlux",
     )
     def update_ui(realtime_enabled):
     realtime.change(
         fn=update_ui,
         inputs=[realtime],
+        outputs=[prompt, generateBtn]
     )
     prompt.submit(
         fn=generate_image,
         inputs=[prompt, seed, width, height, randomize_seed, num_inference_steps],
         outputs=[result, seed, latency],
+        show_progress="full"
     )
     for component in [prompt, width, height, num_inference_steps]:
             inputs=[realtime, prompt, seed, width, height, randomize_seed, num_inference_steps],
             outputs=[result, seed, latency],
             show_progress="hidden",
+            trigger_mode="always_last"
         )
 # Launch the app