Photo2ShojoManga

Running on Zero

App Files Files Community

tori29umai commited on Jun 19, 2024

Commit

25db6c9

1 Parent(s): 4f9226f

app.py

Browse files

Files changed (3) hide show

app.py +34 -7
utils/dl_utils.py +2 -2
utils/image_utils.py +25 -6

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ import os
 import time
 from utils.dl_utils import dl_cn_model, dl_cn_config, dl_tagger_model, dl_lora_model
-from utils.image_utils import resize_image_aspect_ratio, base_generation
 from utils.prompt_utils import execute_prompt, remove_color, remove_duplicates
 from utils.tagger import modelLoad, analysis
@@ -22,8 +22,8 @@ os.makedirs(cn_dir, exist_ok=True)
 os.makedirs(tagger_dir, exist_ok=True)
 os.makedirs(lora_dir, exist_ok=True)
-dl_cn_model(cn_dir)
-dl_cn_config(cn_dir)
 dl_tagger_model(tagger_dir)
 dl_lora_model(lora_dir)
@@ -31,7 +31,11 @@ def load_model(lora_dir, cn_dir):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16
     vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
-    controlnet = ControlNetModel.from_pretrained(cn_dir, torch_dtype=dtype, use_safetensors=True)
     pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
         "cagliostrolab/animagine-xl-3.1", controlnet=controlnet, vae=vae, torch_dtype=torch.float16
     )
@@ -43,12 +47,13 @@ def load_model(lora_dir, cn_dir):
 @spaces.GPU
-def predict(input_image_path, prompt, negative_prompt, controlnet_scale):
     pipe = load_model(lora_dir, cn_dir)
     input_image_pil = Image.open(input_image_path)
     base_size = input_image_pil.size
     resize_image = resize_image_aspect_ratio(input_image_pil)
     white_base_pil = base_generation(resize_image.size, (255, 255, 255, 255)).convert("RGB")
     generator = torch.manual_seed(0)
     last_time = time.time()
     prompt = "masterpiece, best quality, monochrome, lineart, white background, " + prompt
@@ -60,7 +65,7 @@ def predict(input_image_path, prompt, negative_prompt, controlnet_scale):
     output_image = pipe(
         image=white_base_pil,
-        control_image=resize_image,
         strength=1.0,
         prompt=prompt,
         negative_prompt = negative_prompt,
@@ -81,6 +86,8 @@ class Img2Img:
         self.post_filter = True
         self.tagger_model = None
         self.input_image_path = None
     def process_prompt_analysis(self, input_image_path):
         if self.tagger_model is None:
@@ -91,6 +98,10 @@ class Img2Img:
             tags_list = remove_color(tags)
         return tags_list
     def layout(self):
         css = """
@@ -104,6 +115,13 @@ class Img2Img:
             with gr.Row():
                 with gr.Column():
                     self.input_image_path = gr.Image(label="input_image", type='filepath')
                     self.prompt = gr.Textbox(label="prompt", lines=3)
                     self.negative_prompt = gr.Textbox(label="negative_prompt", lines=3, value="lowres, error, extra digit, fewer digits, cropped, worst quality,low quality, normal quality, jpeg artifacts, blurry")
@@ -115,6 +133,12 @@ class Img2Img:
                 with gr.Column():
                     self.output_image = gr.Image(type="pil", label="output_image")
             prompt_analysis_button.click(
                         self.process_prompt_analysis,
@@ -123,9 +147,12 @@ class Img2Img:
             )
             generate_button.click(
                 fn=predict,
-                inputs=[self.input_image_path, self.prompt, self.negative_prompt, self.controlnet_scale],
                 outputs=self.output_image
             )
         return demo

 import time
 from utils.dl_utils import dl_cn_model, dl_cn_config, dl_tagger_model, dl_lora_model
+from utils.image_utils import resize_image_aspect_ratio, base_generation, canny_process
 from utils.prompt_utils import execute_prompt, remove_color, remove_duplicates
 from utils.tagger import modelLoad, analysis
 os.makedirs(tagger_dir, exist_ok=True)
 os.makedirs(lora_dir, exist_ok=True)
+# dl_cn_model(cn_dir)
+# dl_cn_config(cn_dir)
 dl_tagger_model(tagger_dir)
 dl_lora_model(lora_dir)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     dtype = torch.float16
     vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+    # controlnet = ControlNetModel.from_pretrained(cn_dir, torch_dtype=dtype, use_safetensors=True)
+    controlnet = ControlNetModel.from_pretrained(
+        "diffusers/controlnet-canny-sdxl-1.0",
+        torch_dtype=torch.float16
+    )
     pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
         "cagliostrolab/animagine-xl-3.1", controlnet=controlnet, vae=vae, torch_dtype=torch.float16
     )
 @spaces.GPU
+def predict(input_image_path, canny_image, prompt, negative_prompt, controlnet_scale):
     pipe = load_model(lora_dir, cn_dir)
     input_image_pil = Image.open(input_image_path)
     base_size = input_image_pil.size
     resize_image = resize_image_aspect_ratio(input_image_pil)
     white_base_pil = base_generation(resize_image.size, (255, 255, 255, 255)).convert("RGB")
+    canny_image = canny_image.resize(resize_image.size, Image.LANCZOS)
     generator = torch.manual_seed(0)
     last_time = time.time()
     prompt = "masterpiece, best quality, monochrome, lineart, white background, " + prompt
     output_image = pipe(
         image=white_base_pil,
+        control_image=canny_image,
         strength=1.0,
         prompt=prompt,
         negative_prompt = negative_prompt,
         self.post_filter = True
         self.tagger_model = None
         self.input_image_path = None
+        self.canny_image = None
     def process_prompt_analysis(self, input_image_path):
         if self.tagger_model is None:
             tags_list = remove_color(tags)
         return tags_list
+    def _make_canny(self, img_path, canny_threshold1, canny_threshold2):
+        threshold1 = int(canny_threshold1)
+        threshold2 = int(canny_threshold2)
+        return canny_process(img_path, threshold1, threshold2)
     def layout(self):
         css = """
             with gr.Row():
                 with gr.Column():
                     self.input_image_path = gr.Image(label="input_image", type='filepath')
+                    self.canny_image = gr.Image(label="canny_image", type='pil')
+                    with gr.Row():
+                        canny_threshold1 = gr.Slider(minimum=0, value=20, maximum=253, show_label=False)
+                        gr.HTML(value="<span>/</span>", show_label=False)
+                        canny_threshold2 = gr.Slider(minimum=0, value=120, maximum=254, show_label=False)
+                        canny_generate_button = gr.Button("canny_generate", interactive=False)
                     self.prompt = gr.Textbox(label="prompt", lines=3)
                     self.negative_prompt = gr.Textbox(label="negative_prompt", lines=3, value="lowres, error, extra digit, fewer digits, cropped, worst quality,low quality, normal quality, jpeg artifacts, blurry")
                 with gr.Column():
                     self.output_image = gr.Image(type="pil", label="output_image")
+            canny_generate_button.click(
+                        self.process_prompt_analysis,
+                        inputs=[self.input_image, canny_threshold1, canny_threshold2],
+                        outputs=self.canny_image
+            )
             prompt_analysis_button.click(
                         self.process_prompt_analysis,
             )
             generate_button.click(
                 fn=predict,
+                inputs=[self.input_image_path, self.canny_image, self.prompt, self.negative_prompt, self.controlnet_scale],
                 outputs=self.output_image
             )
         return demo

utils/dl_utils.py CHANGED Viewed

@@ -11,7 +11,7 @@ import cv2
 def dl_cn_model(model_dir):
     folder = model_dir
     file_name = 'diffusion_pytorch_model.safetensors'
-    url = "https://huggingface.co/2vXpSwA7/iroiro-lora/resolve/main/test_controlnet2/CN-anytest_v4-marged.safetensors"
     file_path = os.path.join(folder, file_name)
     if not os.path.exists(file_path):
         response = requests.get(url, allow_redirects=True)
@@ -57,7 +57,7 @@ def dl_tagger_model(model_dir):
 def dl_lora_model(model_dir):
-    file_name = 'sdxl_BWLine.safetensors'
     file_path = os.path.join(model_dir, file_name)
     if not os.path.exists(file_path):
         url = "https://huggingface.co/tori29umai/lineart/resolve/main/sdxl_BWLine.safetensors"

 def dl_cn_model(model_dir):
     folder = model_dir
     file_name = 'diffusion_pytorch_model.safetensors'
+    url = "  https://huggingface.co/2vXpSwA7/iroiro-lora/resolve/main/test_controlnet2/CN-anytest_v3-50000_fp16.safetensors"
     file_path = os.path.join(folder, file_name)
     if not os.path.exists(file_path):
         response = requests.get(url, allow_redirects=True)
 def dl_lora_model(model_dir):
+    file_name = 'sdxl_BW_Line.safetensors'
     file_path = os.path.join(model_dir, file_name)
     if not os.path.exists(file_path):
         url = "https://huggingface.co/tori29umai/lineart/resolve/main/sdxl_BWLine.safetensors"

utils/image_utils.py CHANGED Viewed

@@ -1,13 +1,32 @@
-import os
-import requests
-from tqdm import tqdm
-import shutil
 from PIL import Image, ImageOps
 import numpy as np
 import cv2
 def resize_image_aspect_ratio(image):
     # 元の画像サイズを取得
     original_width, original_height = image.size

 from PIL import Image, ImageOps
 import numpy as np
 import cv2
+def canny_process(image_path, threshold1, threshold2):
+    # 画像を開き、RGBA形式に変換して透過情報を保持
+    img = Image.open(image_path)
+    img = img.convert("RGBA")
+    canvas_image = Image.new('RGBA', img.size, (255, 255, 255, 255))
+    # 画像をキャンバスにペーストし、透過部分が白色になるように設定
+    canvas_image.paste(img, (0, 0), img)
+    # RGBAからRGBに変換し、透過部分を白色にする
+    image_pil = canvas_image.convert("RGB")
+    image_np = np.array(image_pil)
+    # グレースケール変換
+    gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
+    # Cannyエッジ検出
+    edges = cv2.Canny(gray, threshold1, threshold2)
+    canny = Image.fromarray(edges)
+    return canny
 def resize_image_aspect_ratio(image):
     # 元の画像サイズを取得
     original_width, original_height = image.size