Ryukijano commited on
Commit
f819c94
·
verified ·
1 Parent(s): 9565796

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -9
app.py CHANGED
@@ -8,7 +8,10 @@ from diffusers import DiffusionPipeline, AutoencoderTiny
8
  from diffusers.models.attention_processor import AttnProcessor2_0
9
  from custom_pipeline import FluxWithCFGPipeline
10
 
 
11
  torch.backends.cuda.matmul.allow_tf32 = True
 
 
12
 
13
  # Constants
14
  MAX_SEED = np.iinfo(np.int32).max
@@ -29,6 +32,10 @@ pipe.set_adapters(["better"], adapter_weights=[1.0])
29
  pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
30
  pipe.unload_lora_weights()
31
 
 
 
 
 
32
  torch.cuda.empty_cache()
33
 
34
  # Inference function
@@ -40,14 +47,15 @@ def generate_image(prompt, seed=24, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT,
40
 
41
  start_time = time.time()
42
 
43
- # Only generate the last image in the sequence
44
- img = pipe.generate_images(
45
- prompt=prompt,
46
- width=width,
47
- height=height,
48
- num_inference_steps=num_inference_steps,
49
- generator=generator
50
- )
 
51
  latency = f"Latency: {(time.time()-start_time):.2f} seconds"
52
  return img, seed, latency
53
 
@@ -163,4 +171,4 @@ with gr.Blocks() as demo:
163
  )
164
 
165
  # Launch the app
166
- demo.launch()
 
8
  from diffusers.models.attention_processor import AttnProcessor2_0
9
  from custom_pipeline import FluxWithCFGPipeline
10
 
11
+ # Enable TF32 and set Tensor Core precision
12
  torch.backends.cuda.matmul.allow_tf32 = True
13
+ torch.backends.cudnn.allow_tf32 = True
14
+ torch.set_float32_matmul_precision('high')
15
 
16
  # Constants
17
  MAX_SEED = np.iinfo(np.int32).max
 
32
  pipe.fuse_lora(adapter_name=["better"], lora_scale=1.0)
33
  pipe.unload_lora_weights()
34
 
35
+ # Memory optimizations (optional, uncomment if needed)
36
+ # pipe.enable_model_cpu_offload()
37
+ # pipe.enable_sequential_cpu_offload()
38
+
39
  torch.cuda.empty_cache()
40
 
41
  # Inference function
 
47
 
48
  start_time = time.time()
49
 
50
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
51
+ # Only generate the last image in the sequence
52
+ img = pipe.generate_images(
53
+ prompt=prompt,
54
+ width=width,
55
+ height=height,
56
+ num_inference_steps=num_inference_steps,
57
+ generator=generator
58
+ )
59
  latency = f"Latency: {(time.time()-start_time):.2f} seconds"
60
  return img, seed, latency
61
 
 
171
  )
172
 
173
  # Launch the app
174
+ demo.launch()