Spaces:

Ryukijano
/

Fastest-image-generation

Runtime error

Ryukijano commited on Dec 9, 2024

Commit

f819c94

verified ·

1 Parent(s): 9565796

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,10 @@ from diffusers import DiffusionPipeline, AutoencoderTiny
 from diffusers.models.attention_processor import AttnProcessor2_0
 from custom_pipeline import FluxWithCFGPipeline
 torch.backends.cuda.matmul.allow_tf32 = True
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
@@ -29,6 +32,10 @@ pipe.set_adapters(["better"], adapter_weights=[1.0])
 pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
 pipe.unload_lora_weights()
 torch.cuda.empty_cache()
 # Inference function
@@ -40,14 +47,15 @@ def generate_image(prompt, seed=24, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT,
     start_time = time.time()
-    # Only generate the last image in the sequence
-    img = pipe.generate_images(
-            prompt=prompt,
-            width=width,
-            height=height,
-            num_inference_steps=num_inference_steps,
-            generator=generator
-        )
     latency = f"Latency: {(time.time()-start_time):.2f} seconds"
     return img, seed, latency
@@ -163,4 +171,4 @@ with gr.Blocks() as demo:
         )
 # Launch the app
-demo.launch()

 from diffusers.models.attention_processor import AttnProcessor2_0
 from custom_pipeline import FluxWithCFGPipeline
+# Enable TF32 and set Tensor Core precision
 torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+torch.set_float32_matmul_precision('high')
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
 pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
 pipe.unload_lora_weights()
+# Memory optimizations (optional, uncomment if needed)
+# pipe.enable_model_cpu_offload()
+# pipe.enable_sequential_cpu_offload()
 torch.cuda.empty_cache()
 # Inference function
     start_time = time.time()
+    with torch.autocast(device_type="cuda", dtype=torch.float16):
+        # Only generate the last image in the sequence
+        img = pipe.generate_images(
+                prompt=prompt,
+                width=width,
+                height=height,
+                num_inference_steps=num_inference_steps,
+                generator=generator
+            )
     latency = f"Latency: {(time.time()-start_time):.2f} seconds"
     return img, seed, latency
         )
 # Launch the app
+demo.launch()