Commit
·
6304c5b
1
Parent(s):
45b110b
Modifying the app code
Browse files
app.py
CHANGED
@@ -67,6 +67,12 @@ def image_loss(images, loss_type, device, elastic_transformer):
|
|
67 |
else:
|
68 |
return torch.tensor(0.0).to(device)
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
def generate_images(prompt, concept):
|
71 |
global pipe, device, elastic_transformer
|
72 |
if pipe is None:
|
@@ -74,96 +80,133 @@ def generate_images(prompt, concept):
|
|
74 |
if elastic_transformer is None:
|
75 |
elastic_transformer = init_transformers(device)
|
76 |
|
77 |
-
#
|
78 |
-
height, width = 384, 384
|
79 |
-
guidance_scale = 8
|
80 |
-
num_inference_steps = 45
|
81 |
-
loss_scale = 10.0
|
82 |
-
|
83 |
-
# Create scheduler
|
84 |
-
scheduler = LMSDiscreteScheduler(
|
85 |
-
beta_start=0.00085,
|
86 |
-
beta_end=0.012,
|
87 |
-
beta_schedule="scaled_linear",
|
88 |
-
num_train_timesteps=1000
|
89 |
-
)
|
90 |
-
pipe.scheduler = scheduler # Set the scheduler
|
91 |
-
|
92 |
-
# Create prompt text
|
93 |
prompt_text = f"{prompt} {concept}"
|
|
|
94 |
|
95 |
-
#
|
96 |
-
seeds = {
|
97 |
-
'none': 42,
|
98 |
-
'blue': 123,
|
99 |
-
'elastic': 456,
|
100 |
-
'symmetry': 789,
|
101 |
-
'saturation': 1000
|
102 |
-
}
|
103 |
-
|
104 |
loss_functions = ['none', 'blue', 'elastic', 'symmetry', 'saturation']
|
105 |
-
images = []
|
106 |
progress = gr.Progress()
|
107 |
|
108 |
-
# Generate image for each loss function
|
109 |
for idx, loss_type in enumerate(loss_functions):
|
110 |
progress(idx/len(loss_functions), f"Generating {loss_type} image...")
|
111 |
-
generator = torch.manual_seed(seeds[loss_type])
|
112 |
|
113 |
-
# Generate base image
|
114 |
try:
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
)
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
-
# Add tuple of (image, label) to list
|
151 |
-
images.append((image, f"{loss_type.capitalize()} Loss"))
|
152 |
-
print(f"Added {loss_type} image to gallery") # Debug print
|
153 |
except Exception as e:
|
154 |
-
print(f"Error
|
155 |
-
continue
|
156 |
-
|
157 |
-
# Clear GPU memory after each image
|
158 |
-
if torch.cuda.is_available():
|
159 |
-
torch.cuda.empty_cache()
|
160 |
-
gc.collect()
|
161 |
|
162 |
-
#
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
165 |
return None
|
166 |
-
return images
|
167 |
|
168 |
def create_interface():
|
169 |
default_prompts = [
|
@@ -187,14 +230,13 @@ def create_interface():
|
|
187 |
gr.Dropdown(choices=concepts, label="Select SD Concept")
|
188 |
],
|
189 |
outputs=gr.Gallery(
|
190 |
-
label="Generated Images
|
191 |
show_label=True,
|
192 |
elem_id="gallery",
|
193 |
columns=5,
|
194 |
rows=1,
|
195 |
-
height=
|
196 |
-
|
197 |
-
), # Simplified Gallery definition
|
198 |
title="Stable Diffusion using Text Inversion",
|
199 |
description="""Generate images using Stable Diffusion with different style concepts. The output shows 5 images side by side:
|
200 |
1. Original Image (No Loss)
|
@@ -204,16 +246,18 @@ def create_interface():
|
|
204 |
5. Saturation Loss - Modifies color saturation
|
205 |
|
206 |
Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
|
207 |
-
|
|
|
|
|
208 |
)
|
209 |
|
210 |
return interface
|
211 |
|
212 |
if __name__ == "__main__":
|
213 |
interface = create_interface()
|
214 |
-
interface.queue(max_size=5) #
|
215 |
interface.launch(
|
216 |
share=True,
|
217 |
server_name="0.0.0.0",
|
218 |
-
|
219 |
)
|
|
|
67 |
else:
|
68 |
return torch.tensor(0.0).to(device)
|
69 |
|
70 |
+
# Update configuration
|
71 |
+
height, width = 512, 512
|
72 |
+
guidance_scale = 8
|
73 |
+
num_inference_steps = 50
|
74 |
+
loss_scale = 200
|
75 |
+
|
76 |
def generate_images(prompt, concept):
|
77 |
global pipe, device, elastic_transformer
|
78 |
if pipe is None:
|
|
|
80 |
if elastic_transformer is None:
|
81 |
elastic_transformer = init_transformers(device)
|
82 |
|
83 |
+
# Create prompt text and initialize results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
prompt_text = f"{prompt} {concept}"
|
85 |
+
all_images = [] # Changed from images to all_images
|
86 |
|
87 |
+
# Process each loss type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
loss_functions = ['none', 'blue', 'elastic', 'symmetry', 'saturation']
|
|
|
89 |
progress = gr.Progress()
|
90 |
|
|
|
91 |
for idx, loss_type in enumerate(loss_functions):
|
92 |
progress(idx/len(loss_functions), f"Generating {loss_type} image...")
|
|
|
93 |
|
|
|
94 |
try:
|
95 |
+
# Better memory management
|
96 |
+
if torch.cuda.is_available():
|
97 |
+
torch.cuda.empty_cache()
|
98 |
+
gc.collect()
|
99 |
+
torch.cuda.empty_cache()
|
100 |
+
|
101 |
+
# Move inputs to correct device and dtype
|
102 |
+
# Remove incorrect device movement
|
103 |
+
# text_input = text_input.to(device) # Remove this line
|
104 |
+
# uncond_input = uncond_input.to(device) # Remove this line
|
105 |
+
# latents = latents.to(dtype=pipe.vae.dtype, device=device) # Remove this line
|
106 |
+
|
107 |
+
# Initialize scheduler and process text first
|
108 |
+
scheduler = LMSDiscreteScheduler(
|
109 |
+
beta_start=0.00085,
|
110 |
+
beta_end=0.012,
|
111 |
+
beta_schedule="scaled_linear",
|
112 |
+
num_train_timesteps=1000
|
113 |
)
|
114 |
+
scheduler.set_timesteps(num_inference_steps)
|
115 |
+
scheduler.timesteps = scheduler.timesteps.to(device)
|
116 |
+
|
117 |
+
# Process text embeddings
|
118 |
+
text_input = pipe.tokenizer(
|
119 |
+
[prompt_text],
|
120 |
+
padding='max_length',
|
121 |
+
max_length=pipe.tokenizer.model_max_length,
|
122 |
+
truncation=True,
|
123 |
+
return_tensors="pt"
|
124 |
+
)
|
125 |
+
|
126 |
+
with torch.no_grad():
|
127 |
+
text_embeddings = pipe.text_encoder(text_input.input_ids.to(device))[0]
|
128 |
+
|
129 |
+
uncond_input = pipe.tokenizer(
|
130 |
+
[""] * 1,
|
131 |
+
padding="max_length",
|
132 |
+
max_length=text_input.input_ids.shape[-1],
|
133 |
+
return_tensors="pt"
|
134 |
+
)
|
135 |
+
|
136 |
+
with torch.no_grad():
|
137 |
+
uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(device))[0]
|
138 |
+
|
139 |
+
text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
|
140 |
+
|
141 |
+
# Generate initial latents with correct dtype
|
142 |
+
generator = torch.manual_seed(idx * 1000)
|
143 |
+
latents = torch.randn(
|
144 |
+
(1, pipe.unet.config.in_channels, height // 8, width // 8),
|
145 |
+
generator=generator,
|
146 |
+
)
|
147 |
+
latents = latents.to(device=device, dtype=pipe.unet.dtype)
|
148 |
+
latents = latents * scheduler.init_noise_sigma
|
149 |
+
|
150 |
+
# Diffusion process
|
151 |
+
for i, t in enumerate(scheduler.timesteps):
|
152 |
+
latent_model_input = torch.cat([latents] * 2)
|
153 |
+
sigma = scheduler.sigmas[i]
|
154 |
+
latent_model_input = scheduler.scale_model_input(latent_model_input, t)
|
155 |
+
|
156 |
+
# Move latent_model_input to correct dtype
|
157 |
+
latent_model_input = latent_model_input.to(dtype=pipe.unet.dtype)
|
158 |
+
|
159 |
+
with torch.no_grad():
|
160 |
+
noise_pred = pipe.unet(
|
161 |
+
latent_model_input,
|
162 |
+
t,
|
163 |
+
encoder_hidden_states=text_embeddings
|
164 |
+
)["sample"]
|
165 |
+
|
166 |
+
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
167 |
+
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
168 |
+
|
169 |
+
# Apply loss every 5 steps if not 'none'
|
170 |
+
if loss_type != 'none' and i % 5 == 0:
|
171 |
+
latents = latents.detach().requires_grad_()
|
172 |
+
latents_x0 = latents - sigma * noise_pred
|
173 |
+
|
174 |
+
# Decode to image space for loss computation
|
175 |
+
with torch.set_grad_enabled(True): # Enable gradients for loss computation
|
176 |
+
denoised_images = pipe.vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5
|
177 |
+
denoised_images = denoised_images.requires_grad_() # Enable gradients for images
|
178 |
+
loss = image_loss(denoised_images, loss_type, device, elastic_transformer)
|
179 |
+
cond_grad = torch.autograd.grad(loss * loss_scale, latents)[0]
|
180 |
+
|
181 |
+
latents = latents.detach() - cond_grad * sigma**2
|
182 |
+
|
183 |
+
latents = scheduler.step(noise_pred, t, latents).prev_sample
|
184 |
+
|
185 |
+
# Proper latent to image conversion
|
186 |
+
latents = (1 / 0.18215) * latents
|
187 |
+
with torch.no_grad():
|
188 |
+
image = pipe.vae.decode(latents).sample
|
189 |
+
|
190 |
+
image = (image / 2 + 0.5).clamp(0, 1)
|
191 |
+
image = image.detach().cpu().permute(0, 2, 3, 1).numpy()
|
192 |
+
image = (image * 255).round().astype("uint8")
|
193 |
+
pil_image = Image.fromarray(image[0])
|
194 |
+
|
195 |
+
# Add image with its label
|
196 |
+
all_images.append((pil_image, f"{loss_type.capitalize()} Loss"))
|
197 |
|
|
|
|
|
|
|
198 |
except Exception as e:
|
199 |
+
print(f"Error generating {loss_type} image: {e}")
|
200 |
+
continue # Continue to next loss type instead of returning None
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
# At the end of the function
|
203 |
+
try:
|
204 |
+
if len(all_images) == 0:
|
205 |
+
raise Exception("No images were generated successfully")
|
206 |
+
return [img for img, _ in all_images]
|
207 |
+
except Exception as e:
|
208 |
+
print(f"Error in generate_images: {e}")
|
209 |
return None
|
|
|
210 |
|
211 |
def create_interface():
|
212 |
default_prompts = [
|
|
|
230 |
gr.Dropdown(choices=concepts, label="Select SD Concept")
|
231 |
],
|
232 |
outputs=gr.Gallery(
|
233 |
+
label="Generated Images",
|
234 |
show_label=True,
|
235 |
elem_id="gallery",
|
236 |
columns=5,
|
237 |
rows=1,
|
238 |
+
height="auto"
|
239 |
+
),
|
|
|
240 |
title="Stable Diffusion using Text Inversion",
|
241 |
description="""Generate images using Stable Diffusion with different style concepts. The output shows 5 images side by side:
|
242 |
1. Original Image (No Loss)
|
|
|
246 |
5. Saturation Loss - Modifies color saturation
|
247 |
|
248 |
Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
|
249 |
+
cache_examples=False,
|
250 |
+
max_batch_size=1,
|
251 |
+
flagging_mode="never"
|
252 |
)
|
253 |
|
254 |
return interface
|
255 |
|
256 |
if __name__ == "__main__":
|
257 |
interface = create_interface()
|
258 |
+
interface.queue(max_size=5) # Remove concurrency_count parameter
|
259 |
interface.launch(
|
260 |
share=True,
|
261 |
server_name="0.0.0.0",
|
262 |
+
server_port=7860
|
263 |
)
|