yeq6x commited on
Commit
f459619
·
1 Parent(s): 8c5f93c
Files changed (1) hide show
  1. app.py +72 -145
app.py CHANGED
@@ -1,136 +1,32 @@
1
  import open3d_zerogpu_fix
2
- import spaces
3
- from diffusers import ControlNetModel
4
- from diffusers import StableDiffusionXLControlNetPipeline
5
- from diffusers import EulerAncestralDiscreteScheduler
6
- from PIL import Image
7
- import torch
8
  import numpy as np
9
- import cv2
10
  import gradio as gr
11
- from torchvision import transforms
12
- from controlnet_aux import OpenposeDetector
13
- import random
14
  import open3d as o3d
15
- from collections import Counter
16
  import trimesh
 
 
 
 
17
 
18
- ratios_map = {
19
- 0.5:{"width":704,"height":1408},
20
- 0.57:{"width":768,"height":1344},
21
- 0.68:{"width":832,"height":1216},
22
- 0.72:{"width":832,"height":1152},
23
- 0.78:{"width":896,"height":1152},
24
- 0.82:{"width":896,"height":1088},
25
- 0.88:{"width":960,"height":1088},
26
- 0.94:{"width":960,"height":1024},
27
- 1.00:{"width":1024,"height":1024},
28
- 1.13:{"width":1088,"height":960},
29
- 1.21:{"width":1088,"height":896},
30
- 1.29:{"width":1152,"height":896},
31
- 1.38:{"width":1152,"height":832},
32
- 1.46:{"width":1216,"height":832},
33
- 1.67:{"width":1280,"height":768},
34
- 1.75:{"width":1344,"height":768},
35
- 2.00:{"width":1408,"height":704}
36
- }
37
- ratios = np.array(list(ratios_map.keys()))
38
-
39
-
40
- openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
41
-
42
- controlnet = ControlNetModel.from_pretrained(
43
- "yeq6x/Image2PositionColor_v3",
44
- torch_dtype=torch.float16
45
- ).to('cuda')
46
-
47
- pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
48
- "yeq6x/animagine_position_map",
49
- controlnet=controlnet,
50
- torch_dtype=torch.float16,
51
- low_cpu_mem_usage=True,
52
- offload_state_dict=True,
53
- ).to('cuda').to(torch.float16)
54
-
55
- pipe.scheduler = EulerAncestralDiscreteScheduler(
56
- beta_start=0.00085,
57
- beta_end=0.012,
58
- beta_schedule="scaled_linear",
59
- num_train_timesteps=1000,
60
- steps_offset=1
61
- )
62
- # pipe.enable_freeu(b1=1.1, b2=1.1, s1=0.5, s2=0.7)
63
- # pipe.enable_xformers_memory_efficient_attention()
64
- pipe.force_zeros_for_empty_prompt = False
65
-
66
- def get_size(init_image):
67
- w,h=init_image.size
68
- curr_ratio = w/h
69
- ind = np.argmin(np.abs(curr_ratio-ratios))
70
- ratio = ratios[ind]
71
- chosen_ratio = ratios_map[ratio]
72
- w,h = chosen_ratio['width'], chosen_ratio['height']
73
- return w,h
74
-
75
- def resize_image(image):
76
- image = image.convert('RGB')
77
- w,h = get_size(image)
78
- resized_image = image.resize((w, h))
79
- return resized_image
80
-
81
- def resize_image_old(image):
82
- image = image.convert('RGB')
83
- current_size = image.size
84
- if current_size[0] > current_size[1]:
85
- center_cropped_image = transforms.functional.center_crop(image, (current_size[1], current_size[1]))
86
- else:
87
- center_cropped_image = transforms.functional.center_crop(image, (current_size[0], current_size[0]))
88
- resized_image = transforms.functional.resize(center_cropped_image, (1024, 1024))
89
- return resized_image
90
 
 
 
 
91
 
92
- @spaces.GPU
93
- def generate_(prompt, negative_prompt, pose_image, input_image, controlnet_conditioning_scale):
94
- generator = torch.Generator()
95
- generator.manual_seed(random.randint(0, 2147483647))
96
- images = pipe(
97
- prompt, negative_prompt=negative_prompt, image=pose_image, num_inference_steps=20, controlnet_conditioning_scale=float(controlnet_conditioning_scale),
98
- generator=generator, height=input_image.size[1], width=input_image.size[0],
99
- ).images
100
- return images
101
 
102
- @spaces.GPU
103
- def process(input_image, prompt, negative_prompt, controlnet_conditioning_scale):
104
-
105
- # resize input_image to 1024x1024
106
- input_image = resize_image(input_image)
107
-
108
- pose_image = openpose(input_image, include_body=True, include_hand=True, include_face=True)
109
 
110
- images = generate_(prompt, negative_prompt, pose_image, input_image, controlnet_conditioning_scale)
111
-
112
- return [pose_image,images[0]]
113
-
114
- @spaces.GPU
115
- def predict_image(cond_image, prompt, negative_prompt, controlnet_conditioning_scale):
116
- print("predict position map")
117
- global pipe
118
- generator = torch.Generator()
119
- generator.manual_seed(random.randint(0, 2147483647))
120
- image = pipe(
121
- prompt,
122
- negative_prompt=negative_prompt,
123
- image = cond_image,
124
- width=1024,
125
- height=1024,
126
- guidance_scale=8,
127
- num_inference_steps=20,
128
- generator=generator,
129
- guess_mode = True,
130
- controlnet_conditioning_scale = controlnet_conditioning_scale
131
- ).images[0]
132
-
133
- return image
134
 
135
  def convert_pil_to_opencv(pil_image):
136
  return np.array(pil_image)
@@ -243,25 +139,56 @@ def outpaint_image(image):
243
  image = paste_image(resized_img)
244
 
245
  return image
246
-
247
- block = gr.Blocks().queue()
248
-
249
- with block:
250
- gr.Markdown("## BRIA 2.3 ControlNet Pose")
251
- with gr.Row():
252
- with gr.Column():
253
- input_image = gr.Image(sources=None, type="pil") # None for upload, ctrl+v and webcam
254
- prompt = gr.Textbox(label="Prompt")
255
- negative_prompt = gr.Textbox(label="Negative prompt", value="Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers")
256
- controlnet_conditioning_scale = gr.Slider(label="ControlNet conditioning scale", minimum=0.1, maximum=2.0, value=1.0, step=0.05)
257
- run_button = gr.Button(value="Run")
258
-
259
- with gr.Column():
260
- with gr.Row():
261
- pose_image_output = gr.Image(label="Pose Image", type="pil", interactive=False)
262
- generated_image_output = gr.Image(label="Generated Image", type="pil", interactive=False)
263
-
264
- run_button.click(fn=process, inputs=[input_image, prompt, negative_prompt, controlnet_conditioning_scale], outputs=[pose_image_output, generated_image_output])
265
-
266
-
267
- block.launch(debug = True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import open3d_zerogpu_fix
 
 
 
 
 
 
2
  import numpy as np
3
+ from PIL import Image
4
  import gradio as gr
 
 
 
5
  import open3d as o3d
 
6
  import trimesh
7
+ from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, EulerAncestralDiscreteScheduler
8
+ import torch
9
+ from collections import Counter
10
+ import random
11
 
12
+ import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ pipe = None
15
+ device = None
16
+ torch_dtype = None
17
 
18
+ def load_model():
19
+ global pipe, device, torch_dtype
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ torch_dtype = torch.float16 if device == "cuda" else torch.float32
 
 
 
 
 
22
 
23
+ pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
24
+ "yeq6x/animagine_position_map",
25
+ controlnet=ControlNetModel.from_pretrained("yeq6x/Image2PositionColor_v3"),
26
+ ).to(device)
27
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
 
 
28
 
29
+ return pipe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  def convert_pil_to_opencv(pil_image):
32
  return np.array(pil_image)
 
139
  image = paste_image(resized_img)
140
 
141
  return image
142
+
143
+ @spaces.GPU
144
+ def predict_image(cond_image, prompt, negative_prompt):
145
+ print("predict position map")
146
+ global pipe
147
+ generator = torch.Generator()
148
+ generator.manual_seed(random.randint(0, 2147483647))
149
+ image = pipe(
150
+ prompt,
151
+ prompt,
152
+ cond_image,
153
+ negative_prompt=negative_prompt,
154
+ width=1024,
155
+ height=1024,
156
+ guidance_scale=8,
157
+ num_inference_steps=20,
158
+ generator=generator,
159
+ guess_mode = True,
160
+ controlnet_conditioning_scale = 0.6,
161
+ ).images[0]
162
+
163
+ return image
164
+
165
+ load_model()
166
+
167
+ # Gradioアプリケーション
168
+ with gr.Blocks() as demo:
169
+ gr.Markdown("## Position Map Visualizer")
170
+
171
+ with gr.Row():
172
+ with gr.Column():
173
+ with gr.Row():
174
+ img1 = gr.Image(type="pil", label="color Image", height=300)
175
+ img2 = gr.Image(type="pil", label="map Image", height=300)
176
+ prompt = gr.Textbox("position map, 1girl, white background", label="Prompt")
177
+ negative_prompt = gr.Textbox("lowres, bad anatomy, bad hands, bad feet, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry", label="Negative Prompt")
178
+ predict_map_btn = gr.Button("Predict Position Map")
179
+ visualize_3d_btn = gr.Button("Generate 3D Point Cloud")
180
+ with gr.Column():
181
+ reconstruction_output = gr.Model3D(label="3D Viewer", height=600)
182
+ gr.Examples(
183
+ examples=[
184
+ ["resources/source/000006.png", "resources/target/000006.png"],
185
+ ["resources/source/006420.png", "resources/target/006420.png"],
186
+ ],
187
+ inputs=[img1, img2]
188
+ )
189
+
190
+ img1.input(outpaint_image, inputs=img1, outputs=img1)
191
+ predict_map_btn.click(predict_image, inputs=[img1, prompt, negative_prompt], outputs=img2)
192
+ visualize_3d_btn.click(visualize_3d, inputs=[img2, img1], outputs=reconstruction_output)
193
+
194
+ demo.launch()