yeq6x commited on
Commit
723c997
·
1 Parent(s): c43cbe2
Files changed (1) hide show
  1. app.py +152 -148
app.py CHANGED
@@ -8,6 +8,7 @@ from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, Eule
8
  import torch
9
  from collections import Counter
10
  import random
 
11
 
12
  ratios_map = {
13
  0.5:{"width":704,"height":1408},
@@ -30,6 +31,7 @@ ratios_map = {
30
  }
31
  ratios = np.array(list(ratios_map.keys()))
32
 
 
33
  controlnet = ControlNetModel.from_pretrained(
34
  "yeq6x/Image2PositionColor_v3",
35
  torch_dtype=torch.float16
@@ -43,13 +45,14 @@ pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
43
  offload_state_dict=True,
44
  ).to('cuda').to(torch.float16)
45
 
46
- pipe.scheduler = EulerAncestralDiscreteScheduler(
47
- beta_start=0.00085,
48
- beta_end=0.012,
49
- beta_schedule="scaled_linear",
50
- num_train_timesteps=1000,
51
- steps_offset=1
52
- )
 
53
  pipe.force_zeros_for_empty_prompt = False
54
 
55
  def get_size(init_image):
@@ -78,147 +81,148 @@ def generate_(prompt, negative_prompt, pose_image, input_image, num_steps, contr
78
 
79
  @spaces.GPU
80
  def process(input_image, prompt, negative_prompt, num_steps, controlnet_conditioning_scale, seed):
81
-
82
  # resize input_image to 1024x1024
83
  input_image = resize_image(input_image)
84
 
85
- images = generate_(prompt, negative_prompt, input_image, input_image, num_steps, controlnet_conditioning_scale, seed)
86
-
87
- return [input_image,images[0]]
88
-
89
- @spaces.GPU
90
- def predict_image(cond_image, prompt, negative_prompt, controlnet_conditioning_scale):
91
- print("predict position map")
92
- global pipe
93
- generator = torch.Generator()
94
- generator.manual_seed(random.randint(0, 2147483647))
95
- image = pipe(
96
- prompt,
97
- negative_prompt=negative_prompt,
98
- image = cond_image,
99
- width=1024,
100
- height=1024,
101
- guidance_scale=8,
102
- num_inference_steps=20,
103
- generator=generator,
104
- guess_mode = True,
105
- controlnet_conditioning_scale = controlnet_conditioning_scale
106
- ).images[0]
 
 
107
 
108
- return image
109
-
110
-
111
- def convert_pil_to_opencv(pil_image):
112
- return np.array(pil_image)
113
-
114
- def inv_func(y,
115
- c = -712.380100,
116
- a = 137.375240,
117
- b = 192.435866):
118
- return (np.exp((y - c) / a) - np.exp(-c/a)) / 964.8468371292845
119
-
120
- def create_point_cloud(img1, img2):
121
- if img1.shape != img2.shape:
122
- raise ValueError("Both images must have the same dimensions.")
123
-
124
- h, w, _ = img1.shape
125
- points = []
126
- colors = []
127
- for y in range(h):
128
- for x in range(w):
129
- # ピクセル位置 (x, y) のRGBをXYZとして取得
130
- r, g, b = img1[y, x]
131
- r = inv_func(r) * 0.9
132
- g = inv_func(g) / 1.7 * 0.6
133
- b = inv_func(b)
134
- r *= 150
135
- g *= 150
136
- b *= 150
137
- points.append([g, b, r]) # X, Y, Z
138
- # 対応するピクセル位置の画像2の色を取得
139
- colors.append(img2[y, x] / 255.0) # 色は0〜1にスケール
140
-
141
- return np.array(points), np.array(colors)
142
-
143
- def point_cloud_to_glb(points, colors):
144
- # Open3Dでポイントクラウドを作成
145
- pc = o3d.geometry.PointCloud()
146
- pc.points = o3d.utility.Vector3dVector(points)
147
- pc.colors = o3d.utility.Vector3dVector(colors)
148
 
149
- # 一時的にPLY形式で保存
150
- temp_ply_file = "temp_output.ply"
151
- o3d.io.write_point_cloud(temp_ply_file, pc)
152
 
153
- # PLYをGLBに変換
154
- mesh = trimesh.load(temp_ply_file)
155
- glb_file = "output.glb"
156
- mesh.export(glb_file)
157
 
158
- return glb_file
159
 
160
- def visualize_3d(image1, image2):
161
- print("Processing...")
162
- # PIL画像をOpenCV形式に変換
163
- img1 = convert_pil_to_opencv(image1)
164
- img2 = convert_pil_to_opencv(image2)
165
 
166
- # ポイントクラウド生成
167
- points, colors = create_point_cloud(img1, img2)
168
 
169
- # GLB形式に変換
170
- glb_file = point_cloud_to_glb(points, colors)
171
 
172
- return glb_file
173
 
174
- def scale_image(original_image):
175
- aspect_ratio = original_image.width / original_image.height
176
 
177
- if original_image.width > original_image.height:
178
- new_width = 1024
179
- new_height = round(new_width / aspect_ratio)
180
- else:
181
- new_height = 1024
182
- new_width = round(new_height * aspect_ratio)
183
 
184
- resized_original = original_image.resize((new_width, new_height), Image.LANCZOS)
185
 
186
- return resized_original
187
 
188
- def get_edge_mode_color(img, edge_width=10):
189
- # 外周の10ピクセル領域を取得
190
- left = img.crop((0, 0, edge_width, img.height)) # 左端
191
- right = img.crop((img.width - edge_width, 0, img.width, img.height)) # 右端
192
- top = img.crop((0, 0, img.width, edge_width)) # 上端
193
- bottom = img.crop((0, img.height - edge_width, img.width, img.height)) # 下端
194
 
195
- # 各領域のピクセルデータを取得して結合
196
- colors = list(left.getdata()) + list(right.getdata()) + list(top.getdata()) + list(bottom.getdata())
197
 
198
- # 最頻値(mode)を計算
199
- mode_color = Counter(colors).most_common(1)[0][0] # 最も頻繁に出現する色を取得
200
 
201
- return mode_color
202
 
203
- def paste_image(resized_img):
204
- # 外周10pxの最頻値を背景色に設定
205
- mode_color = get_edge_mode_color(resized_img, edge_width=10)
206
- mode_background = Image.new("RGBA", (1024, 1024), mode_color)
207
- mode_background = mode_background.convert('RGB')
208
 
209
- x = (1024 - resized_img.width) // 2
210
- y = (1024 - resized_img.height) // 2
211
- mode_background.paste(resized_img, (x, y))
212
 
213
- return mode_background
214
 
215
- def outpaint_image(image):
216
- if type(image) == type(None):
217
- return None
218
- resized_img = scale_image(image)
219
- image = paste_image(resized_img)
220
 
221
- return image
222
 
223
  block = gr.Blocks().queue()
224
 
@@ -241,30 +245,30 @@ with block:
241
  ips = [input_image, prompt, negative_prompt, num_steps, controlnet_conditioning_scale, seed]
242
  run_button.click(fn=process, inputs=ips, outputs=[pose_image_output, generated_image_output])
243
 
244
- gr.Markdown("## Position Map Visualizer")
245
 
246
- with gr.Row():
247
- with gr.Column():
248
- with gr.Row():
249
- img1 = gr.Image(type="pil", label="color Image", height=300)
250
- img2 = gr.Image(type="pil", label="map Image", height=300)
251
- prompt = gr.Textbox("position map, 1girl, white background", label="Prompt")
252
- negative_prompt = gr.Textbox("lowres, bad anatomy, bad hands, bad feet, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry", label="Negative Prompt")
253
- controlnet_conditioning_scale = gr.Slider(label="ControlNet conditioning scale", minimum=0.1, maximum=2.0, value=0.6, step=0.05)
254
- predict_map_btn = gr.Button("Predict Position Map")
255
- visualize_3d_btn = gr.Button("Generate 3D Point Cloud")
256
- with gr.Column():
257
- reconstruction_output = gr.Model3D(label="3D Viewer", height=600)
258
- gr.Examples(
259
- examples=[
260
- ["resources/source/000006.png", "resources/target/000006.png"],
261
- ["resources/source/006420.png", "resources/target/006420.png"],
262
- ],
263
- inputs=[img1, img2]
264
- )
265
-
266
- img1.input(outpaint_image, inputs=img1, outputs=img1)
267
- predict_map_btn.click(predict_image, inputs=[img1, prompt, negative_prompt, controlnet_conditioning_scale], outputs=img2)
268
- visualize_3d_btn.click(visualize_3d, inputs=[img2, img1], outputs=reconstruction_output)
269
 
270
  block.launch(debug = True)
 
8
  import torch
9
  from collections import Counter
10
  import random
11
+ from controlnet_aux import OpenposeDetector
12
 
13
  ratios_map = {
14
  0.5:{"width":704,"height":1408},
 
31
  }
32
  ratios = np.array(list(ratios_map.keys()))
33
 
34
+ openpose = OpenposeDetector.from_pretrained('lllyasviel/ControlNet')
35
  controlnet = ControlNetModel.from_pretrained(
36
  "yeq6x/Image2PositionColor_v3",
37
  torch_dtype=torch.float16
 
45
  offload_state_dict=True,
46
  ).to('cuda').to(torch.float16)
47
 
48
+ # pipe.scheduler = EulerAncestralDiscreteScheduler(
49
+ # beta_start=0.00085,
50
+ # beta_end=0.012,
51
+ # beta_schedule="scaled_linear",
52
+ # num_train_timesteps=1000,
53
+ # steps_offset=1
54
+ # )
55
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
56
  pipe.force_zeros_for_empty_prompt = False
57
 
58
  def get_size(init_image):
 
81
 
82
  @spaces.GPU
83
  def process(input_image, prompt, negative_prompt, num_steps, controlnet_conditioning_scale, seed):
 
84
  # resize input_image to 1024x1024
85
  input_image = resize_image(input_image)
86
 
87
+ pose_image = openpose(input_image, include_body=True, include_hand=True, include_face=True)
88
+
89
+ images = generate_(prompt, negative_prompt, pose_image, input_image, num_steps, controlnet_conditioning_scale, seed)
90
+
91
+ return [pose_image,images[0]]
92
+
93
+ # @spaces.GPU
94
+ # def predict_image(cond_image, prompt, negative_prompt, controlnet_conditioning_scale):
95
+ # print("predict position map")
96
+ # global pipe
97
+ # generator = torch.Generator()
98
+ # generator.manual_seed(random.randint(0, 2147483647))
99
+ # image = pipe(
100
+ # prompt,
101
+ # negative_prompt=negative_prompt,
102
+ # image = cond_image,
103
+ # width=1024,
104
+ # height=1024,
105
+ # guidance_scale=8,
106
+ # num_inference_steps=20,
107
+ # generator=generator,
108
+ # guess_mode = True,
109
+ # controlnet_conditioning_scale = controlnet_conditioning_scale
110
+ # ).images[0]
111
 
112
+ # return image
113
+
114
+
115
+ # def convert_pil_to_opencv(pil_image):
116
+ # return np.array(pil_image)
117
+
118
+ # def inv_func(y,
119
+ # c = -712.380100,
120
+ # a = 137.375240,
121
+ # b = 192.435866):
122
+ # return (np.exp((y - c) / a) - np.exp(-c/a)) / 964.8468371292845
123
+
124
+ # def create_point_cloud(img1, img2):
125
+ # if img1.shape != img2.shape:
126
+ # raise ValueError("Both images must have the same dimensions.")
127
+
128
+ # h, w, _ = img1.shape
129
+ # points = []
130
+ # colors = []
131
+ # for y in range(h):
132
+ # for x in range(w):
133
+ # # ピクセル位置 (x, y) のRGBをXYZとして取得
134
+ # r, g, b = img1[y, x]
135
+ # r = inv_func(r) * 0.9
136
+ # g = inv_func(g) / 1.7 * 0.6
137
+ # b = inv_func(b)
138
+ # r *= 150
139
+ # g *= 150
140
+ # b *= 150
141
+ # points.append([g, b, r]) # X, Y, Z
142
+ # # 対応するピクセル��置の画像2の色を取得
143
+ # colors.append(img2[y, x] / 255.0) # 色は0〜1にスケール
144
+
145
+ # return np.array(points), np.array(colors)
146
+
147
+ # def point_cloud_to_glb(points, colors):
148
+ # # Open3Dでポイントクラウドを作成
149
+ # pc = o3d.geometry.PointCloud()
150
+ # pc.points = o3d.utility.Vector3dVector(points)
151
+ # pc.colors = o3d.utility.Vector3dVector(colors)
152
 
153
+ # # 一時的にPLY形式で保存
154
+ # temp_ply_file = "temp_output.ply"
155
+ # o3d.io.write_point_cloud(temp_ply_file, pc)
156
 
157
+ # # PLYをGLBに変換
158
+ # mesh = trimesh.load(temp_ply_file)
159
+ # glb_file = "output.glb"
160
+ # mesh.export(glb_file)
161
 
162
+ # return glb_file
163
 
164
+ # def visualize_3d(image1, image2):
165
+ # print("Processing...")
166
+ # # PIL画像をOpenCV形式に変換
167
+ # img1 = convert_pil_to_opencv(image1)
168
+ # img2 = convert_pil_to_opencv(image2)
169
 
170
+ # # ポイントクラウド生成
171
+ # points, colors = create_point_cloud(img1, img2)
172
 
173
+ # # GLB形式に変換
174
+ # glb_file = point_cloud_to_glb(points, colors)
175
 
176
+ # return glb_file
177
 
178
+ # def scale_image(original_image):
179
+ # aspect_ratio = original_image.width / original_image.height
180
 
181
+ # if original_image.width > original_image.height:
182
+ # new_width = 1024
183
+ # new_height = round(new_width / aspect_ratio)
184
+ # else:
185
+ # new_height = 1024
186
+ # new_width = round(new_height * aspect_ratio)
187
 
188
+ # resized_original = original_image.resize((new_width, new_height), Image.LANCZOS)
189
 
190
+ # return resized_original
191
 
192
+ # def get_edge_mode_color(img, edge_width=10):
193
+ # # 外周の10ピクセル領域を取得
194
+ # left = img.crop((0, 0, edge_width, img.height)) # 左端
195
+ # right = img.crop((img.width - edge_width, 0, img.width, img.height)) # 右端
196
+ # top = img.crop((0, 0, img.width, edge_width)) # 上端
197
+ # bottom = img.crop((0, img.height - edge_width, img.width, img.height)) # 下端
198
 
199
+ # # 各領域のピクセルデータを取得して結合
200
+ # colors = list(left.getdata()) + list(right.getdata()) + list(top.getdata()) + list(bottom.getdata())
201
 
202
+ # # 最頻値(mode)を計算
203
+ # mode_color = Counter(colors).most_common(1)[0][0] # 最も頻繁に出現する色を取得
204
 
205
+ # return mode_color
206
 
207
+ # def paste_image(resized_img):
208
+ # # 外周10pxの最頻値を背景色に設定
209
+ # mode_color = get_edge_mode_color(resized_img, edge_width=10)
210
+ # mode_background = Image.new("RGBA", (1024, 1024), mode_color)
211
+ # mode_background = mode_background.convert('RGB')
212
 
213
+ # x = (1024 - resized_img.width) // 2
214
+ # y = (1024 - resized_img.height) // 2
215
+ # mode_background.paste(resized_img, (x, y))
216
 
217
+ # return mode_background
218
 
219
+ # def outpaint_image(image):
220
+ # if type(image) == type(None):
221
+ # return None
222
+ # resized_img = scale_image(image)
223
+ # image = paste_image(resized_img)
224
 
225
+ # return image
226
 
227
  block = gr.Blocks().queue()
228
 
 
245
  ips = [input_image, prompt, negative_prompt, num_steps, controlnet_conditioning_scale, seed]
246
  run_button.click(fn=process, inputs=ips, outputs=[pose_image_output, generated_image_output])
247
 
248
+ # gr.Markdown("## Position Map Visualizer")
249
 
250
+ # with gr.Row():
251
+ # with gr.Column():
252
+ # with gr.Row():
253
+ # img1 = gr.Image(type="pil", label="color Image", height=300)
254
+ # img2 = gr.Image(type="pil", label="map Image", height=300)
255
+ # prompt = gr.Textbox("position map, 1girl, white background", label="Prompt")
256
+ # negative_prompt = gr.Textbox("lowres, bad anatomy, bad hands, bad feet, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry", label="Negative Prompt")
257
+ # controlnet_conditioning_scale = gr.Slider(label="ControlNet conditioning scale", minimum=0.1, maximum=2.0, value=0.6, step=0.05)
258
+ # predict_map_btn = gr.Button("Predict Position Map")
259
+ # visualize_3d_btn = gr.Button("Generate 3D Point Cloud")
260
+ # with gr.Column():
261
+ # reconstruction_output = gr.Model3D(label="3D Viewer", height=600)
262
+ # gr.Examples(
263
+ # examples=[
264
+ # ["resources/source/000006.png", "resources/target/000006.png"],
265
+ # ["resources/source/006420.png", "resources/target/006420.png"],
266
+ # ],
267
+ # inputs=[img1, img2]
268
+ # )
269
+
270
+ # img1.input(outpaint_image, inputs=img1, outputs=img1)
271
+ # predict_map_btn.click(predict_image, inputs=[img1, prompt, negative_prompt, controlnet_conditioning_scale], outputs=img2)
272
+ # visualize_3d_btn.click(visualize_3d, inputs=[img2, img1], outputs=reconstruction_output)
273
 
274
  block.launch(debug = True)