skyBluezz commited on
Commit
1717c15
·
verified ·
1 Parent(s): 8356b41

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +410 -0
app.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ from typing import Tuple, Union, List
3
+ import os
4
+
5
+ import numpy as np
6
+ from PIL import Image
7
+
8
+ import torch
9
+ from diffusers.pipelines.controlnet import StableDiffusionControlNetInpaintPipeline
10
+ from diffusers import ControlNetModel, UniPCMultistepScheduler, AutoPipelineForText2Image
11
+ from transformers import AutoImageProcessor, UperNetForSemanticSegmentation, \
12
+ AutoModelForDepthEstimation, DetrForObjectDetection, DetrImageProcessor
13
+
14
+ from colors import ade_palette
15
+ from utils import map_colors_rgb
16
+ from diffusers import StableDiffusionXLPipeline
17
+ import gradio as gr
18
+ import gc
19
+
20
+ device = "cuda"
21
+ dtype = torch.float16
22
+
23
+
24
+ css = """
25
+ #img-display-container {
26
+ max-height: 50vh;
27
+ }
28
+ #img-display-input {
29
+ max-height: 40vh;
30
+ }
31
+ #img-display-output {
32
+ max-height: 40vh;
33
+ }
34
+
35
+ """
36
+
37
+
38
+ def filter_items(
39
+ colors_list: Union[List, np.ndarray],
40
+ items_list: Union[List, np.ndarray],
41
+ items_to_remove: Union[List, np.ndarray]
42
+ ) -> Tuple[Union[List, np.ndarray], Union[List, np.ndarray]]:
43
+ """
44
+ Filters items and their corresponding colors from given lists, excluding
45
+ specified items.
46
+
47
+ Args:
48
+ colors_list: A list or numpy array of colors corresponding to items.
49
+ items_list: A list or numpy array of items.
50
+ items_to_remove: A list or numpy array of items to be removed.
51
+
52
+ Returns:
53
+ A tuple of two lists or numpy arrays: filtered colors and filtered
54
+ items.
55
+ """
56
+ filtered_colors = []
57
+ filtered_items = []
58
+ for color, item in zip(colors_list, items_list):
59
+ if item not in items_to_remove:
60
+ filtered_colors.append(color)
61
+ filtered_items.append(item)
62
+ return filtered_colors, filtered_items
63
+
64
+ def get_segmentation_pipeline(
65
+ ) -> Tuple[AutoImageProcessor, UperNetForSemanticSegmentation]:
66
+ """Method to load the segmentation pipeline
67
+ Returns:
68
+ Tuple[AutoImageProcessor, UperNetForSemanticSegmentation]: segmentation pipeline
69
+ """
70
+ image_processor = AutoImageProcessor.from_pretrained(
71
+ "openmmlab/upernet-convnext-small"
72
+ )
73
+ image_segmentor = UperNetForSemanticSegmentation.from_pretrained(
74
+ "openmmlab/upernet-convnext-small"
75
+ )
76
+ return image_processor, image_segmentor
77
+
78
+
79
+ @torch.inference_mode()
80
+ @spaces.GPU
81
+ def segment_image(
82
+ image: Image,
83
+ image_processor: AutoImageProcessor,
84
+ image_segmentor: UperNetForSemanticSegmentation
85
+ ) -> Image:
86
+ """
87
+ Segments an image using a semantic segmentation model.
88
+
89
+ Args:
90
+ image (Image): The input image to be segmented.
91
+ image_processor (AutoImageProcessor): The processor to prepare the
92
+ image for segmentation.
93
+ image_segmentor (UperNetForSemanticSegmentation): The semantic
94
+ segmentation model used to identify different segments in the image.
95
+
96
+ Returns:
97
+ Image: The segmented image with each segment colored differently based
98
+ on its identified class.
99
+ """
100
+ # image_processor, image_segmentor = get_segmentation_pipeline()
101
+ pixel_values = image_processor(image, return_tensors="pt").pixel_values
102
+ with torch.no_grad():
103
+ outputs = image_segmentor(pixel_values)
104
+
105
+ seg = image_processor.post_process_semantic_segmentation(
106
+ outputs, target_sizes=[image.size[::-1]])[0]
107
+ color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
108
+ palette = np.array(ade_palette())
109
+ for label, color in enumerate(palette):
110
+ color_seg[seg == label, :] = color
111
+ color_seg = color_seg.astype(np.uint8)
112
+ seg_image = Image.fromarray(color_seg).convert('RGB')
113
+ return seg_image
114
+
115
+
116
+ def get_depth_pipeline():
117
+ feature_extractor = AutoImageProcessor.from_pretrained("LiheYoung/depth-anything-large-hf",
118
+ torch_dtype=dtype)
119
+ depth_estimator = AutoModelForDepthEstimation.from_pretrained("LiheYoung/depth-anything-large-hf",
120
+ torch_dtype=dtype)
121
+ return feature_extractor, depth_estimator
122
+
123
+
124
+ @torch.inference_mode()
125
+ @spaces.GPU
126
+ def get_depth_image(
127
+ image: Image,
128
+ feature_extractor: AutoImageProcessor,
129
+ depth_estimator: AutoModelForDepthEstimation
130
+ ) -> Image:
131
+ image_to_depth = feature_extractor(images=image, return_tensors="pt").to(device)
132
+ with torch.no_grad():
133
+ depth_map = depth_estimator(**image_to_depth).predicted_depth
134
+
135
+ width, height = image.size
136
+ depth_map = torch.nn.functional.interpolate(
137
+ depth_map.unsqueeze(1).float(),
138
+ size=(height, width),
139
+ mode="bicubic",
140
+ align_corners=False,
141
+ )
142
+ depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
143
+ depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
144
+ depth_map = (depth_map - depth_min) / (depth_max - depth_min)
145
+ image = torch.cat([depth_map] * 3, dim=1)
146
+
147
+ image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
148
+ image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
149
+ return image
150
+
151
+ # ---------------------
152
+ # ------- DETR --------
153
+ # ---------------------
154
+ @torch.inference_mode()
155
+ @spaces.GPU
156
+ def run_detr(image:Image, confidence_threshold: float = 0.8):
157
+
158
+ outputs = detr_model(pixel_values=image, pixel_mask=None)
159
+ width, height = image.size
160
+ postprocessed_outputs = detr_processor.post_process_object_detection(outputs,
161
+ target_sizes=[(height, width)],
162
+ threshold=confidence_threshold)
163
+ outputs = postprocessed_outputs[0]
164
+ # dict{scores, logits, labels, boxes}
165
+ outputs = outputs['boxes']
166
+ return outputs
167
+
168
+ def resize_dimensions(dimensions, target_size):
169
+ """
170
+ Resize PIL to target size while maintaining aspect ratio
171
+ If smaller than target size leave it as is
172
+ """
173
+ width, height = dimensions
174
+
175
+ # Check if both dimensions are smaller than the target size
176
+ if width < target_size and height < target_size:
177
+ return dimensions
178
+
179
+ # Determine the larger side
180
+ if width > height:
181
+ # Calculate the aspect ratio
182
+ aspect_ratio = height / width
183
+ # Resize dimensions
184
+ return (target_size, int(target_size * aspect_ratio))
185
+ else:
186
+ # Calculate the aspect ratio
187
+ aspect_ratio = width / height
188
+ # Resize dimensions
189
+ return (int(target_size * aspect_ratio), target_size)
190
+
191
+
192
+ def flush():
193
+ gc.collect()
194
+ torch.cuda.empty_cache()
195
+
196
+
197
+ class ControlNetDepthDesignModelMulti:
198
+ """ Produces random noise images """
199
+
200
+ def __init__(self):
201
+ """ Initialize your model(s) here """
202
+ #os.environ['HF_HUB_OFFLINE'] = "True"
203
+
204
+ self.seed = 323*111
205
+ self.neg_prompt = "window, door, low resolution, banner, logo, watermark, text, deformed, blurry, out of focus, surreal, ugly, beginner"
206
+ self.control_items = ["windowpane;window", "door;double;door"]
207
+ self.additional_quality_suffix = "interior design, 4K, high resolution, photorealistic"
208
+
209
+ @spaces.GPU
210
+ def generate_design(self, empty_room_image: Image, prompt: str, guidance_scale: int = 10, num_steps: int = 50, strength: float =0.9, img_size: int = 640) -> Image:
211
+ """
212
+ Given an image of an empty room and a prompt
213
+ generate the designed room according to the prompt
214
+ Inputs -
215
+ empty_room_image - An RGB PIL Image of the empty room
216
+ prompt - Text describing the target design elements of the room
217
+ Returns -
218
+ design_image - PIL Image of the same size as the empty room image
219
+ If the size is not the same the submission will fail.
220
+ """
221
+ print(prompt)
222
+ flush()
223
+ self.generator = torch.Generator(device=device).manual_seed(self.seed)
224
+
225
+ pos_prompt = prompt + f', {self.additional_quality_suffix}'
226
+
227
+ orig_w, orig_h = empty_room_image.size
228
+ new_width, new_height = resize_dimensions(empty_room_image.size, img_size)
229
+ input_image = empty_room_image.resize((new_width, new_height))
230
+ real_seg = np.array(segment_image(input_image,
231
+ seg_image_processor,
232
+ image_segmentor))
233
+ unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
234
+ unique_colors = [tuple(color) for color in unique_colors]
235
+ segment_items = [map_colors_rgb(i) for i in unique_colors]
236
+ chosen_colors, segment_items = filter_items(
237
+ colors_list=unique_colors,
238
+ items_list=segment_items,
239
+ items_to_remove=self.control_items
240
+ )
241
+ mask = np.zeros_like(real_seg)
242
+ for color in chosen_colors:
243
+ color_matches = (real_seg == color).all(axis=2)
244
+ mask[color_matches] = 1
245
+
246
+ image_np = np.array(input_image)
247
+ image = Image.fromarray(image_np).convert("RGB")
248
+ mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")
249
+ segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
250
+
251
+ image_depth = get_depth_image(image, depth_feature_extractor, depth_estimator)
252
+
253
+ # generate image that would be used as IP-adapter
254
+ flush()
255
+ new_width_ip = int(new_width / 8) * 8
256
+ new_height_ip = int(new_height / 8) * 8
257
+ ip_image = guide_pipe(pos_prompt,
258
+ num_inference_steps=num_steps,
259
+ negative_prompt=self.neg_prompt,
260
+ height=new_height_ip,
261
+ width=new_width_ip,
262
+ generator=[self.generator]).images[0]
263
+
264
+ flush()
265
+ generated_image = pipe(
266
+ prompt=pos_prompt,
267
+ negative_prompt=self.neg_prompt,
268
+ num_inference_steps=num_steps,
269
+ strength=strength,
270
+ guidance_scale=guidance_scale,
271
+ generator=[self.generator],
272
+ image=image,
273
+ mask_image=mask_image,
274
+ ip_adapter_image=ip_image,
275
+ control_image=[image_depth, segmentation_cond_image],
276
+ controlnet_conditioning_scale=[0.5, 0.5]
277
+ ).images[0]
278
+
279
+ flush()
280
+ design_image = generated_image.resize(
281
+ (orig_w, orig_h), Image.Resampling.LANCZOS
282
+ )
283
+
284
+ return design_image
285
+
286
+
287
+ def create_demo(model):
288
+ gr.Markdown("### Stable Design demo")
289
+ with gr.Row():
290
+ with gr.Column():
291
+ input_image = gr.Image(label="Input Image", type='pil', elem_id='img-display-input')
292
+ input_text = gr.Textbox(label='Prompt', placeholder='Please upload your image first', lines=2)
293
+ with gr.Accordion('Advanced options', open=False):
294
+ num_steps = gr.Slider(label='Steps',
295
+ minimum=1,
296
+ maximum=50,
297
+ value=50,
298
+ step=1)
299
+ img_size = gr.Slider(label='Image size',
300
+ minimum=256,
301
+ maximum=768,
302
+ value=768,
303
+ step=64)
304
+ guidance_scale = gr.Slider(label='Guidance Scale',
305
+ minimum=0.1,
306
+ maximum=30.0,
307
+ value=10.0,
308
+ step=0.1)
309
+ seed = gr.Slider(label='Seed',
310
+ minimum=-1,
311
+ maximum=2147483647,
312
+ value=323*111,
313
+ step=1,
314
+ randomize=True)
315
+ strength = gr.Slider(label='Strength',
316
+ minimum=0.1,
317
+ maximum=1.0,
318
+ value=0.9,
319
+ step=0.1)
320
+ a_prompt = gr.Textbox(
321
+ label='Added Prompt',
322
+ value="interior design, 4K, high resolution, photorealistic")
323
+ n_prompt = gr.Textbox(
324
+ label='Negative Prompt',
325
+ value="window, door, low resolution, banner, logo, watermark, text, deformed, blurry, out of focus, surreal, ugly, beginner")
326
+ submit = gr.Button("Submit")
327
+
328
+ with gr.Column():
329
+ design_image = gr.Image(label="Output Mask", elem_id='img-display-output')
330
+
331
+
332
+ def on_submit(image, text, num_steps, guidance_scale, seed, strength, a_prompt, n_prompt,
333
+ img_size, detr_confidence_threshold=0.8):
334
+
335
+ model.seed = seed
336
+ model.neg_prompt = n_prompt
337
+ model.additional_quality_suffix = a_prompt
338
+
339
+ with torch.no_grad():
340
+ out_img = model.generate_design(image, text, guidance_scale=guidance_scale, num_steps=num_steps, strength=strength, img_size=img_size)
341
+ # -----------------
342
+ # -- run detr --
343
+ # -----------------
344
+ bboxes = run_detr(out_img, detr_confidence_threshold)
345
+ return (out_img, bboxes)
346
+
347
+ submit.click(on_submit, inputs=[input_image, input_text, num_steps, guidance_scale, seed, strength, a_prompt, n_prompt, img_size], outputs=design_image[0])
348
+ examples = gr.Examples(examples=[["imgs/bedroom_1.jpg", "An elegantly appointed bedroom in the Art Deco style, featuring a grand king-size bed with geometric bedding, a luxurious velvet armchair, and a mirrored nightstand that reflects the room's opulence. Art Deco-inspired artwork adds a touch of glamour"], ["imgs/bedroom_2.jpg", "A bedroom that exudes French country charm with a soft upholstered bed, walls adorned with floral wallpaper, and a vintage wooden wardrobe. A crystal chandelier casts a warm, inviting glow over the space"], ["imgs/dinning_room_1.jpg", "A cozy dining room that captures the essence of rustic charm with a solid wooden farmhouse table at its core, surrounded by an eclectic mix of mismatched chairs. An antique sideboard serves as a statement piece, and the ambiance is warmly lit by a series of quaint Edison bulbs dangling from the ceiling"], ["imgs/dinning_room_3.jpg", "A dining room that epitomizes contemporary elegance, anchored by a sleek, minimalist dining table paired with stylish modern chairs. Artistic lighting fixtures create a focal point above, while the surrounding minimalist decor ensures the space feels open, airy, and utterly modern"], ["imgs/image_1.jpg", "A glamorous master bedroom in Hollywood Regency style, boasting a plush tufted headboard, mirrored furniture reflecting elegance, luxurious fabrics in rich textures, and opulent gold accents for a touch of luxury."], ["imgs/image_2.jpg", "A vibrant living room with a tropical theme, complete with comfortable rattan furniture, large leafy plants bringing the outdoors in, bright cushions adding pops of color, and bamboo blinds for natural light control."], ["imgs/living_room_1.jpg", "A stylish living room embracing mid-century modern aesthetics, featuring a vintage teak coffee table at its center, complemented by a classic sunburst clock on the wall and a cozy shag rug underfoot, creating a warm and inviting atmosphere"]],
349
+ inputs=[input_image, input_text], cache_examples=False)
350
+
351
+
352
+ controlnet_depth= ControlNetModel.from_pretrained(
353
+ "controlnet_depth", torch_dtype=dtype, use_safetensors=True)
354
+ controlnet_seg = ControlNetModel.from_pretrained(
355
+ "own_controlnet", torch_dtype=dtype, use_safetensors=True)
356
+
357
+ pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
358
+ "SG161222/Realistic_Vision_V5.1_noVAE",
359
+ #"models/runwayml--stable-diffusion-inpainting",
360
+ controlnet=[controlnet_depth, controlnet_seg],
361
+ safety_checker=None,
362
+ torch_dtype=dtype
363
+ )
364
+
365
+ pipe.load_ip_adapter("h94/IP-Adapter", subfolder="models",
366
+ weight_name="ip-adapter_sd15.bin")
367
+ pipe.set_ip_adapter_scale(0.4)
368
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
369
+ pipe = pipe.to(device)
370
+ guide_pipe = StableDiffusionXLPipeline.from_pretrained("segmind/SSD-1B",
371
+ torch_dtype=dtype, use_safetensors=True, variant="fp16")
372
+ guide_pipe = guide_pipe.to(device)
373
+
374
+ seg_image_processor, image_segmentor = get_segmentation_pipeline()
375
+ depth_feature_extractor, depth_estimator = get_depth_pipeline()
376
+ depth_estimator = depth_estimator.to(device)
377
+
378
+ # ---------------------------------------
379
+ # Load Detr Model
380
+ # ---------------------------------------
381
+ detr_model = DetrForObjectDetection.from_pretrained("facebook-detr-50",
382
+ # id2label={v:k for k,v in CLASS2ID.items()},
383
+ dtype=dtype,
384
+ use_safetensors=True)
385
+ detr_processor = DetrImageProcessor.from_pretrained("facebook-detr-50",
386
+ dtype=dtype,
387
+ use_safetensors=True)
388
+
389
+ def main():
390
+ model = ControlNetDepthDesignModelMulti()
391
+ print('Models uploaded successfully')
392
+
393
+ title = "# StableDesign"
394
+ description = """
395
+ <p style='font-size: 14px; margin-bottom: 10px;'><a href='https://www.linkedin.com/in/mykola-lavreniuk/'>Mykola Lavreniuk</a>, <a href='https://www.linkedin.com/in/bartosz-ludwiczuk-a677a760/'>Bartosz Ludwiczuk</a></p>
396
+ <p style='font-size: 16px; margin-bottom: 0px; margin-top=0px;'>Official demo for <strong>StableDesign:</strong> 2nd place solution for the Generative Interior Design 2024 <a href='https://www.aicrowd.com/challenges/generative-interior-design-challenge-2024/leaderboards?challenge_round_id=1314'>competition</a>. StableDesign is a deep learning model designed to harness the power of AI, providing innovative and creative tools for designers. Using our algorithms, images of empty rooms can be transformed into fully furnished spaces based on text descriptions. Please refer to our <a href='https://github.com/Lavreniuk/generative-interior-design'>GitHub</a> for more details.</p>
397
+ """
398
+ with gr.Blocks() as demo:
399
+ gr.Markdown(title)
400
+ gr.Markdown(description)
401
+
402
+ create_demo(model)
403
+ gr.HTML('''<br><br><br><center>You can duplicate this Space to skip the queue:<a href="https://huggingface.co/spaces/MykolaL/StableDesign?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a><br>
404
+ <p><img src="https://visitor-badge.glitch.me/badge?page_id=MykolaL/StableDesign" alt="visitors"></p></center>''')
405
+
406
+ demo.queue().launch(share=False)
407
+
408
+
409
+ if __name__ == '__main__':
410
+ main()