Spaces:

ura23
/

wd-tagger

Running

App Files Files Community

ura23 commited on Jan 25

Commit

ce8d28d

verified ·

1 Parent(s): ac2c511

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -117

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import argparse
 import os
 import gradio as gr
 import huggingface_hub
@@ -7,10 +8,12 @@ import numpy as np
 import onnxruntime as rt
 import pandas as pd
 from PIL import Image
-TITLE = "WaifuDiffusion Tagger"
 DESCRIPTION = """
-Demo for the WaifuDiffusion tagger models
 """
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
@@ -22,34 +25,17 @@ VIT_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-tagger-v3"
 VIT_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-large-tagger-v3"
 EVA02_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-eva02-large-tagger-v3"
-# Dataset v2 series of models:
-MOAT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-moat-tagger-v2"
-SWIN_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-swinv2-tagger-v2"
-CONV_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
-CONV2_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnextv2-tagger-v2"
-VIT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-vit-tagger-v2"
-# IdolSankaku series of models:
-EVA02_LARGE_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-eva02-large-tagger-v1"
-SWINV2_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-swinv2-tagger-v1"
-# Files to download from the repos
 MODEL_FILENAME = "model.onnx"
 LABEL_FILENAME = "selected_tags.csv"
-def parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--score-slider-step", type=float, default=0.05)
-    parser.add_argument("--score-general-threshold", type=float, default=0.3)
-    parser.add_argument("--score-character-threshold", type=float, default=1.0)
-    return parser.parse_args()
-def load_labels(dataframe) -> list[str]:
-    tag_names = dataframe["name"].tolist()
-    general_indexes = list(np.where(dataframe["category"] == 0)[0])
-    character_indexes = list(np.where(dataframe["category"] == 4)[0])
-    return tag_names, general_indexes, character_indexes
 class Predictor:
     def __init__(self):
         self.model_target_size = None
@@ -66,7 +52,7 @@ class Predictor:
         csv_path, model_path = self.download_model(model_repo)
         tags_df = pd.read_csv(csv_path)
-        self.tag_names, self.general_indexes, self.character_indexes = load_labels(tags_df)
         model = rt.InferenceSession(model_path)
         _, height, width, _ = model.get_inputs()[0].shape
@@ -74,21 +60,19 @@ class Predictor:
         self.last_loaded_repo = model_repo
         self.model = model
     def prepare_image(self, image):
-        # Create a white canvas with the same size as the input image
         canvas = Image.new("RGBA", image.size, (255, 255, 255))
-        # Ensure the input image has an alpha channel for compositing
         if image.mode != "RGBA":
             image = image.convert("RGBA")
-        # Composite the input image onto the canvas
         canvas.alpha_composite(image)
-        # Convert to RGB (alpha channel is no longer needed)
         image = canvas.convert("RGB")
-        # Resize the image to a square of size (model_target_size x model_target_size)
         max_dim = max(image.size)
         padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
         pad_left = (max_dim - image.width) // 2
@@ -96,10 +80,7 @@ class Predictor:
         padded_image.paste(image, (pad_left, pad_top))
         padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
-        # Convert the image to a NumPy array
-        image_array = np.asarray(padded_image, dtype=np.float32)[:, :, ::-1]
-        return np.expand_dims(image_array, axis=0)
     def predict(self, images, model_repo, general_thresh, character_thresh):
         self.load_model(model_repo)
@@ -118,101 +99,58 @@ class Predictor:
         return results
 def main():
-    args = parse_args()
     predictor = Predictor()
-    model_repos = [
-        SWINV2_MODEL_DSV3_REPO,
-        CONV_MODEL_DSV3_REPO,
-        VIT_MODEL_DSV3_REPO,
-        VIT_LARGE_MODEL_DSV3_REPO,
-        EVA02_LARGE_MODEL_DSV3_REPO,
-        # ---
-        MOAT_MODEL_DSV2_REPO,
-        SWIN_MODEL_DSV2_REPO,
-        CONV_MODEL_DSV2_REPO,
-        CONV2_MODEL_DSV2_REPO,
-        VIT_MODEL_DSV2_REPO,
-        # ---
-        SWINV2_MODEL_IS_DSV1_REPO,
-        EVA02_LARGE_MODEL_IS_DSV1_REPO,
-    ]
-    predefined_tags = ["loli", "oppai_loli", "minigirl", "babydoll", "monochrome", "greyscale", "speech_bubble", "english_text", "copyright_name", "twitter_username", "artist_name", "watermark", "censored", "bar_censor", "blank_censor", "blur_censor", "light_censor", "mosaic_censoring"]  # Default tags to filter out
     with gr.Blocks(title=TITLE) as demo:
         gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
         gr.Markdown(DESCRIPTION)
         with gr.Row():
             with gr.Column():
-                image_files = gr.File(
-                    file_types=["image"], label="Upload Images", file_count="multiple",
-                )
-                # Wrap the model selection and sliders in an Accordion
-                with gr.Accordion("Advanced Settings", open=False):  # Collapsible by default
-                    model_repo = gr.Dropdown(
-                        model_repos,
-                        value=VIT_MODEL_DSV3_REPO,
-                        label="Select Model",
-                    )
-                    general_thresh = gr.Slider(
-                        0, 1, step=args.score_slider_step, value=args.score_general_threshold, label="General Tags Threshold"
-                    )
-                    character_thresh = gr.Slider(
-                        0, 1, step=args.score_slider_step, value=args.score_character_threshold, label="Character Tags Threshold"
-                    )
-                    filter_tags = gr.Textbox(
-                        value=", ".join(predefined_tags),
-                        label="Filter Tags (comma-separated)",
-                        placeholder="Add tags to filter out (e.g., winter, red, from above)",
-                        lines=3
-                    )
-                submit = gr.Button(
-                    value="Process Images", variant="primary"
-                )
             with gr.Column():
-                output = gr.Textbox(label="Output", lines=10)
-        def process_images(files, model_repo, general_thresh, character_thresh, filter_tags):
             images = [Image.open(file.name) for file in files]
-            results = predictor.predict(images, model_repo, general_thresh, character_thresh)
-            # Parse filter tags
-            filter_set = set(tag.strip().lower() for tag in filter_tags.split(","))
-            # Generate formatted output
-            prompts = []
-            for i, (general_tags, character_tags) in enumerate(results):
-                # Replace underscores with spaces for both character and general tags
-                character_part = ", ".join(
-                    tag.replace('_', ' ') for tag in character_tags if tag.lower() not in filter_set
-                )
-                general_part = ", ".join(
-                    tag.replace('_', ' ') for tag in general_tags if tag.lower() not in filter_set
-                )
-                # Construct the prompt based on the presence of character_part
-                if character_part:
-                    prompts.append(f"{character_part}, {general_part}")
-                else:
-                    prompts.append(general_part)
-            # Join all prompts with blank lines
-            return "\n\n".join(prompts)
         submit.click(
             process_images,
-            inputs=[image_files, model_repo, general_thresh, character_thresh, filter_tags],
-            outputs=output
         )
-    demo.queue(max_size=10)
     demo.launch()
 if __name__ == "__main__":
-    main()

 import argparse
 import os
+from pathlib import Path
 import gradio as gr
 import huggingface_hub
 import onnxruntime as rt
 import pandas as pd
 from PIL import Image
+from tagger.common import Heatmap, ImageLabels, LabelData, load_labels_hf, preprocess_image
+from tagger.model import load_model_and_transform, process_heatmap
+TITLE = "WaifuDiffusion Tagger with Heatmap"
 DESCRIPTION = """
+Demo for the WaifuDiffusion tagger models with heatmap and grid visualization.
 """
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 VIT_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-large-tagger-v3"
 EVA02_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-eva02-large-tagger-v3"
+MODEL_REPOS = [
+    SWINV2_MODEL_DSV3_REPO,
+    CONV_MODEL_DSV3_REPO,
+    VIT_MODEL_DSV3_REPO,
+    VIT_LARGE_MODEL_DSV3_REPO,
+    EVA02_LARGE_MODEL_DSV3_REPO,
+]
 MODEL_FILENAME = "model.onnx"
 LABEL_FILENAME = "selected_tags.csv"
 class Predictor:
     def __init__(self):
         self.model_target_size = None
         csv_path, model_path = self.download_model(model_repo)
         tags_df = pd.read_csv(csv_path)
+        self.tag_names, self.general_indexes, self.character_indexes = self.load_labels(tags_df)
         model = rt.InferenceSession(model_path)
         _, height, width, _ = model.get_inputs()[0].shape
         self.last_loaded_repo = model_repo
         self.model = model
+    def load_labels(self, dataframe):
+        tag_names = dataframe["name"].tolist()
+        general_indexes = list(np.where(dataframe["category"] == 0)[0])
+        character_indexes = list(np.where(dataframe["category"] == 4)[0])
+        return tag_names, general_indexes, character_indexes
     def prepare_image(self, image):
         canvas = Image.new("RGBA", image.size, (255, 255, 255))
         if image.mode != "RGBA":
             image = image.convert("RGBA")
         canvas.alpha_composite(image)
         image = canvas.convert("RGB")
         max_dim = max(image.size)
         padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
         pad_left = (max_dim - image.width) // 2
         padded_image.paste(image, (pad_left, pad_top))
         padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
+        return np.expand_dims(np.asarray(padded_image, dtype=np.float32)[:, :, ::-1], axis=0)
     def predict(self, images, model_repo, general_thresh, character_thresh):
         self.load_model(model_repo)
         return results
+    def generate_heatmap_and_grid(self, image, model_repo, threshold):
+        model, transform = load_model_and_transform(model_repo)
+        labels = load_labels_hf(model_repo)
+        image = preprocess_image(image, (448, 448))
+        image = transform(image).unsqueeze(0)
+        heatmaps, heatmap_grid, _ = process_heatmap(model, image, labels, threshold)
+        return [(x.image, x.label) for x in heatmaps], heatmap_grid
 def main():
     predictor = Predictor()
     with gr.Blocks(title=TITLE) as demo:
         gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
         gr.Markdown(DESCRIPTION)
         with gr.Row():
             with gr.Column():
+                image_files = gr.File(file_types=["image"], label="Upload Images", file_count="multiple")
+                model_repo = gr.Dropdown(MODEL_REPOS, value=VIT_MODEL_DSV3_REPO, label="Select Model")
+                threshold = gr.Slider(0, 1, step=0.01, value=0.35, label="Heatmap Threshold")
+                general_thresh = gr.Slider(0, 1, step=0.05, value=0.3, label="General Tags Threshold")
+                character_thresh = gr.Slider(0, 1, step=0.05, value=1.0, label="Character Tags Threshold")
+                submit = gr.Button(value="Process Images", variant="primary")
             with gr.Column():
+                with gr.Tab(label="Tags"):
+                    output_tags = gr.Textbox(label="Output Tags", lines=10)
+                with gr.Tab(label="Heatmaps"):
+                    heatmap_gallery = gr.Gallery(label="Heatmap Gallery")
+                with gr.Tab(label="Grid"):
+                    heatmap_grid = gr.Image(label="Heatmap Grid")
+        def process_images(files, model_repo, general_thresh, character_thresh, threshold):
             images = [Image.open(file.name) for file in files]
+            tag_results = predictor.predict(images, model_repo, general_thresh, character_thresh)
+            heatmap_results, grid_result = predictor.generate_heatmap_and_grid(images[0], model_repo, threshold)
+            tag_output = []
+            for general_tags, character_tags in tag_results:
+                general_str = ", ".join(general_tags)
+                character_str = ", ".join(character_tags)
+                tag_output.append(f"Characters: {character_str}\nGeneral: {general_str}")
+            return "\n\n".join(tag_output), heatmap_results, grid_result
         submit.click(
             process_images,
+            inputs=[image_files, model_repo, general_thresh, character_thresh, threshold],
+            outputs=[output_tags, heatmap_gallery, heatmap_grid]
         )
     demo.launch()
 if __name__ == "__main__":
+    main()