tori29umai commited on
Commit
25db6c9
1 Parent(s): 4f9226f
Files changed (3) hide show
  1. app.py +34 -7
  2. utils/dl_utils.py +2 -2
  3. utils/image_utils.py +25 -6
app.py CHANGED
@@ -7,7 +7,7 @@ import os
7
  import time
8
 
9
  from utils.dl_utils import dl_cn_model, dl_cn_config, dl_tagger_model, dl_lora_model
10
- from utils.image_utils import resize_image_aspect_ratio, base_generation
11
 
12
  from utils.prompt_utils import execute_prompt, remove_color, remove_duplicates
13
  from utils.tagger import modelLoad, analysis
@@ -22,8 +22,8 @@ os.makedirs(cn_dir, exist_ok=True)
22
  os.makedirs(tagger_dir, exist_ok=True)
23
  os.makedirs(lora_dir, exist_ok=True)
24
 
25
- dl_cn_model(cn_dir)
26
- dl_cn_config(cn_dir)
27
  dl_tagger_model(tagger_dir)
28
  dl_lora_model(lora_dir)
29
 
@@ -31,7 +31,11 @@ def load_model(lora_dir, cn_dir):
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
  dtype = torch.float16
33
  vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
34
- controlnet = ControlNetModel.from_pretrained(cn_dir, torch_dtype=dtype, use_safetensors=True)
 
 
 
 
35
  pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
36
  "cagliostrolab/animagine-xl-3.1", controlnet=controlnet, vae=vae, torch_dtype=torch.float16
37
  )
@@ -43,12 +47,13 @@ def load_model(lora_dir, cn_dir):
43
 
44
 
45
  @spaces.GPU
46
- def predict(input_image_path, prompt, negative_prompt, controlnet_scale):
47
  pipe = load_model(lora_dir, cn_dir)
48
  input_image_pil = Image.open(input_image_path)
49
  base_size = input_image_pil.size
50
  resize_image = resize_image_aspect_ratio(input_image_pil)
51
  white_base_pil = base_generation(resize_image.size, (255, 255, 255, 255)).convert("RGB")
 
52
  generator = torch.manual_seed(0)
53
  last_time = time.time()
54
  prompt = "masterpiece, best quality, monochrome, lineart, white background, " + prompt
@@ -60,7 +65,7 @@ def predict(input_image_path, prompt, negative_prompt, controlnet_scale):
60
 
61
  output_image = pipe(
62
  image=white_base_pil,
63
- control_image=resize_image,
64
  strength=1.0,
65
  prompt=prompt,
66
  negative_prompt = negative_prompt,
@@ -81,6 +86,8 @@ class Img2Img:
81
  self.post_filter = True
82
  self.tagger_model = None
83
  self.input_image_path = None
 
 
84
 
85
  def process_prompt_analysis(self, input_image_path):
86
  if self.tagger_model is None:
@@ -91,6 +98,10 @@ class Img2Img:
91
  tags_list = remove_color(tags)
92
  return tags_list
93
 
 
 
 
 
94
 
95
  def layout(self):
96
  css = """
@@ -104,6 +115,13 @@ class Img2Img:
104
  with gr.Row():
105
  with gr.Column():
106
  self.input_image_path = gr.Image(label="input_image", type='filepath')
 
 
 
 
 
 
 
107
  self.prompt = gr.Textbox(label="prompt", lines=3)
108
  self.negative_prompt = gr.Textbox(label="negative_prompt", lines=3, value="lowres, error, extra digit, fewer digits, cropped, worst quality,low quality, normal quality, jpeg artifacts, blurry")
109
 
@@ -115,6 +133,12 @@ class Img2Img:
115
  with gr.Column():
116
  self.output_image = gr.Image(type="pil", label="output_image")
117
 
 
 
 
 
 
 
118
 
119
  prompt_analysis_button.click(
120
  self.process_prompt_analysis,
@@ -123,9 +147,12 @@ class Img2Img:
123
  )
124
 
125
 
 
 
 
126
  generate_button.click(
127
  fn=predict,
128
- inputs=[self.input_image_path, self.prompt, self.negative_prompt, self.controlnet_scale],
129
  outputs=self.output_image
130
  )
131
  return demo
 
7
  import time
8
 
9
  from utils.dl_utils import dl_cn_model, dl_cn_config, dl_tagger_model, dl_lora_model
10
+ from utils.image_utils import resize_image_aspect_ratio, base_generation, canny_process
11
 
12
  from utils.prompt_utils import execute_prompt, remove_color, remove_duplicates
13
  from utils.tagger import modelLoad, analysis
 
22
  os.makedirs(tagger_dir, exist_ok=True)
23
  os.makedirs(lora_dir, exist_ok=True)
24
 
25
+ # dl_cn_model(cn_dir)
26
+ # dl_cn_config(cn_dir)
27
  dl_tagger_model(tagger_dir)
28
  dl_lora_model(lora_dir)
29
 
 
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
  dtype = torch.float16
33
  vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
34
+ # controlnet = ControlNetModel.from_pretrained(cn_dir, torch_dtype=dtype, use_safetensors=True)
35
+ controlnet = ControlNetModel.from_pretrained(
36
+ "diffusers/controlnet-canny-sdxl-1.0",
37
+ torch_dtype=torch.float16
38
+ )
39
  pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
40
  "cagliostrolab/animagine-xl-3.1", controlnet=controlnet, vae=vae, torch_dtype=torch.float16
41
  )
 
47
 
48
 
49
  @spaces.GPU
50
+ def predict(input_image_path, canny_image, prompt, negative_prompt, controlnet_scale):
51
  pipe = load_model(lora_dir, cn_dir)
52
  input_image_pil = Image.open(input_image_path)
53
  base_size = input_image_pil.size
54
  resize_image = resize_image_aspect_ratio(input_image_pil)
55
  white_base_pil = base_generation(resize_image.size, (255, 255, 255, 255)).convert("RGB")
56
+ canny_image = canny_image.resize(resize_image.size, Image.LANCZOS)
57
  generator = torch.manual_seed(0)
58
  last_time = time.time()
59
  prompt = "masterpiece, best quality, monochrome, lineart, white background, " + prompt
 
65
 
66
  output_image = pipe(
67
  image=white_base_pil,
68
+ control_image=canny_image,
69
  strength=1.0,
70
  prompt=prompt,
71
  negative_prompt = negative_prompt,
 
86
  self.post_filter = True
87
  self.tagger_model = None
88
  self.input_image_path = None
89
+ self.canny_image = None
90
+
91
 
92
  def process_prompt_analysis(self, input_image_path):
93
  if self.tagger_model is None:
 
98
  tags_list = remove_color(tags)
99
  return tags_list
100
 
101
+ def _make_canny(self, img_path, canny_threshold1, canny_threshold2):
102
+ threshold1 = int(canny_threshold1)
103
+ threshold2 = int(canny_threshold2)
104
+ return canny_process(img_path, threshold1, threshold2)
105
 
106
  def layout(self):
107
  css = """
 
115
  with gr.Row():
116
  with gr.Column():
117
  self.input_image_path = gr.Image(label="input_image", type='filepath')
118
+ self.canny_image = gr.Image(label="canny_image", type='pil')
119
+ with gr.Row():
120
+ canny_threshold1 = gr.Slider(minimum=0, value=20, maximum=253, show_label=False)
121
+ gr.HTML(value="<span>/</span>", show_label=False)
122
+ canny_threshold2 = gr.Slider(minimum=0, value=120, maximum=254, show_label=False)
123
+ canny_generate_button = gr.Button("canny_generate", interactive=False)
124
+
125
  self.prompt = gr.Textbox(label="prompt", lines=3)
126
  self.negative_prompt = gr.Textbox(label="negative_prompt", lines=3, value="lowres, error, extra digit, fewer digits, cropped, worst quality,low quality, normal quality, jpeg artifacts, blurry")
127
 
 
133
  with gr.Column():
134
  self.output_image = gr.Image(type="pil", label="output_image")
135
 
136
+ canny_generate_button.click(
137
+ self.process_prompt_analysis,
138
+ inputs=[self.input_image, canny_threshold1, canny_threshold2],
139
+ outputs=self.canny_image
140
+ )
141
+
142
 
143
  prompt_analysis_button.click(
144
  self.process_prompt_analysis,
 
147
  )
148
 
149
 
150
+
151
+
152
+
153
  generate_button.click(
154
  fn=predict,
155
+ inputs=[self.input_image_path, self.canny_image, self.prompt, self.negative_prompt, self.controlnet_scale],
156
  outputs=self.output_image
157
  )
158
  return demo
utils/dl_utils.py CHANGED
@@ -11,7 +11,7 @@ import cv2
11
  def dl_cn_model(model_dir):
12
  folder = model_dir
13
  file_name = 'diffusion_pytorch_model.safetensors'
14
- url = "https://huggingface.co/2vXpSwA7/iroiro-lora/resolve/main/test_controlnet2/CN-anytest_v4-marged.safetensors"
15
  file_path = os.path.join(folder, file_name)
16
  if not os.path.exists(file_path):
17
  response = requests.get(url, allow_redirects=True)
@@ -57,7 +57,7 @@ def dl_tagger_model(model_dir):
57
 
58
 
59
  def dl_lora_model(model_dir):
60
- file_name = 'sdxl_BWLine.safetensors'
61
  file_path = os.path.join(model_dir, file_name)
62
  if not os.path.exists(file_path):
63
  url = "https://huggingface.co/tori29umai/lineart/resolve/main/sdxl_BWLine.safetensors"
 
11
  def dl_cn_model(model_dir):
12
  folder = model_dir
13
  file_name = 'diffusion_pytorch_model.safetensors'
14
+ url = " https://huggingface.co/2vXpSwA7/iroiro-lora/resolve/main/test_controlnet2/CN-anytest_v3-50000_fp16.safetensors"
15
  file_path = os.path.join(folder, file_name)
16
  if not os.path.exists(file_path):
17
  response = requests.get(url, allow_redirects=True)
 
57
 
58
 
59
  def dl_lora_model(model_dir):
60
+ file_name = 'sdxl_BW_Line.safetensors'
61
  file_path = os.path.join(model_dir, file_name)
62
  if not os.path.exists(file_path):
63
  url = "https://huggingface.co/tori29umai/lineart/resolve/main/sdxl_BWLine.safetensors"
utils/image_utils.py CHANGED
@@ -1,13 +1,32 @@
1
- import os
2
-
3
- import requests
4
- from tqdm import tqdm
5
- import shutil
6
-
7
  from PIL import Image, ImageOps
8
  import numpy as np
9
  import cv2
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def resize_image_aspect_ratio(image):
12
  # 元の画像サイズを取得
13
  original_width, original_height = image.size
 
 
 
 
 
 
 
1
  from PIL import Image, ImageOps
2
  import numpy as np
3
  import cv2
4
 
5
+ def canny_process(image_path, threshold1, threshold2):
6
+ # 画像を開き、RGBA形式に変換して透過情報を保持
7
+ img = Image.open(image_path)
8
+ img = img.convert("RGBA")
9
+
10
+ canvas_image = Image.new('RGBA', img.size, (255, 255, 255, 255))
11
+
12
+ # 画像をキャンバスにペーストし、透過部分が白色になるように設定
13
+ canvas_image.paste(img, (0, 0), img)
14
+
15
+ # RGBAからRGBに変換し、透過部分を白色にする
16
+ image_pil = canvas_image.convert("RGB")
17
+ image_np = np.array(image_pil)
18
+
19
+ # グレースケール変換
20
+ gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
21
+ # Cannyエッジ検出
22
+ edges = cv2.Canny(gray, threshold1, threshold2)
23
+
24
+ canny = Image.fromarray(edges)
25
+
26
+
27
+ return canny
28
+
29
+
30
  def resize_image_aspect_ratio(image):
31
  # 元の画像サイズを取得
32
  original_width, original_height = image.size