Spaces:

ura23
/

wd-tagger

Running

App Files Files Community

ura23 commited on Jan 29

Commit

6e17304

verified ·

1 Parent(s): fe45257

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -200

app.py CHANGED Viewed

@@ -17,21 +17,12 @@ HF_TOKEN = os.environ.get("HF_TOKEN", "")
 # Dataset v3 series of models:
 SWINV2_MODEL_DSV3_REPO = "SmilingWolf/wd-swinv2-tagger-v3"
-CONV_MODEL_DSV3_REPO = "SmilingWolf/wd-convnext-tagger-v3"
-VIT_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-tagger-v3"
-VIT_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-large-tagger-v3"
-EVA02_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-eva02-large-tagger-v3"
 # Dataset v2 series of models:
 MOAT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-moat-tagger-v2"
-SWIN_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-swinv2-tagger-v2"
-CONV_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
-CONV2_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnextv2-tagger-v2"
-VIT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-vit-tagger-v2"
 # IdolSankaku series of models:
 EVA02_LARGE_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-eva02-large-tagger-v1"
-SWINV2_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-swinv2-tagger-v1"
 # Files to download from the repos
 MODEL_FILENAME = "model.onnx"
@@ -50,6 +41,25 @@ def load_labels(dataframe) -> list[str]:
     character_indexes = list(np.where(dataframe["category"] == 4)[0])
     return tag_names, general_indexes, character_indexes
 class Predictor:
     def __init__(self):
         self.model_target_size = None
@@ -75,20 +85,10 @@ class Predictor:
         self.model = model
     def prepare_image(self, image):
-        # Create a white canvas with the same size as the input image
-        canvas = Image.new("RGBA", image.size, (255, 255, 255))
-        # Ensure the input image has an alpha channel for compositing
         if image.mode != "RGBA":
             image = image.convert("RGBA")
-        # Composite the input image onto the canvas
-        canvas.alpha_composite(image)
-        # Convert to RGB (alpha channel is no longer needed)
-        image = canvas.convert("RGB")
-        # Resize the image to a square of size (model_target_size x model_target_size)
         max_dim = max(image.size)
         padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
         pad_left = (max_dim - image.width) // 2
@@ -96,15 +96,12 @@ class Predictor:
         padded_image.paste(image, (pad_left, pad_top))
         padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
-        # Convert the image to a NumPy array
         image_array = np.asarray(padded_image, dtype=np.float32)[:, :, ::-1]
         return np.expand_dims(image_array, axis=0)
     def predict(self, images, model_repo, general_thresh, character_thresh):
         self.load_model(model_repo)
         results = []
         for image in images:
             image = self.prepare_image(image)
             input_name = self.model.get_inputs()[0].name
@@ -115,186 +112,39 @@ class Predictor:
             general_res = [x[0] for i, x in enumerate(labels) if i in self.general_indexes and x[1] > general_thresh]
             character_res = [x[0] for i, x in enumerate(labels) if i in self.character_indexes and x[1] > character_thresh]
             results.append((general_res, character_res))
         return results
-def main():
-    args = parse_args()
-    predictor = Predictor()
-    model_repos = [
-        SWINV2_MODEL_DSV3_REPO,
-        CONV_MODEL_DSV3_REPO,
-        VIT_MODEL_DSV3_REPO,
-        VIT_LARGE_MODEL_DSV3_REPO,
-        EVA02_LARGE_MODEL_DSV3_REPO,
-        # ---
-        MOAT_MODEL_DSV2_REPO,
-        SWIN_MODEL_DSV2_REPO,
-        CONV_MODEL_DSV2_REPO,
-        CONV2_MODEL_DSV2_REPO,
-        VIT_MODEL_DSV2_REPO,
-        # ---
-        SWINV2_MODEL_IS_DSV1_REPO,
-        EVA02_LARGE_MODEL_IS_DSV1_REPO,
-    ]
-    predefined_tags = ["loli",
-                       "oppai_loli",
-                       "onee-shota",
-                       "incest",
-                       "furry",
-                       "furry_female",
-                       "shota",
-                       "male_focus",
-                       "signature",
-                       "lolita_hairband",
-                       "otoko_no_ko",
-                       "minigirl",
-                       "patreon_username",
-                       "babydoll",
-                       "monochrome",
-                       "happy_birthday",
-                       "happy_new_year",
-                       "dated",
-                       "thought_bubble",
-                       "greyscale",
-                       "speech_bubble",
-                       "english_text",
-                       "copyright_name",
-                       "twitter_username",
-                       "patreon username",
-                       "patreon logo",
-                       "cover",
-                       "content_rating"
-                       "cover_page",
-                       "doujin_cover",
-                       "sex",
-                       "artist_name",
-                       "watermark",
-                       "censored",
-                       "bar_censor",
-                       "blank_censor",
-                       "blur_censor",
-                       "light_censor",
-                       "mosaic_censoring"]
-    with gr.Blocks(title=TITLE) as demo:
-        gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
-        gr.Markdown(DESCRIPTION)
-        with gr.Row():
-            with gr.Column():
-                image_files = gr.File(
-                    file_types=["image"], label="Upload Images", file_count="multiple",
-                )
-                # Wrap the model selection and sliders in an Accordion
-                with gr.Accordion("Advanced Settings", open=False):  # Collapsible by default
-                    model_repo = gr.Dropdown(
-                        model_repos,
-                        value=VIT_MODEL_DSV3_REPO,
-                        label="Select Model",
-                    )
-                    general_thresh = gr.Slider(
-                        0, 1, step=args.score_slider_step, value=args.score_general_threshold, label="General Tags Threshold"
-                    )
-                    character_thresh = gr.Slider(
-                        0, 1, step=args.score_slider_step, value=args.score_character_threshold, label="Character Tags Threshold"
-                    )
-                    filter_tags = gr.Textbox(
-                        value=", ".join(predefined_tags),
-                        label="Filter Tags (comma-separated)",
-                        placeholder="Add tags to filter out (e.g., winter, red, from above)",
-                        lines=3
-                    )
-                submit = gr.Button(
-                    value="Process Images", variant="primary"
-                )
-            with gr.Column():
-                output = gr.Textbox(label="Output", lines=10)
-        def parse_replacement_rules(rules_text):
-            """Parse user-defined tag replacement rules into a dictionary."""
-            rules = {}
-            for line in rules_text.strip().split("\n"):
-                if "->" in line:
-                    old_tags, new_tags = map(str.strip, line.split("->"))
-                    old_tags_list = tuple(map(str.strip, old_tags.lower().split(",")))
-                    new_tags_list = [tag.strip() for tag in new_tags.split(",")]
-                    rules[old_tags_list] = new_tags_list
-            return rules
-        def apply_replacements(tags, replacement_rules):
-            """Apply replacement rules to a set of tags."""
-            tags_set = set(tags)
-            for old_tags, new_tags in replacement_rules.items():
-                if set(old_tags).issubset(tags_set):  # If all old tags exist in the set
-                    tags_set.difference_update(old_tags)  # Remove old tags
-                    tags_set.update(new_tags)  # Add new ones
-            return list(tags_set)
-        def process_images(files, model_repo, general_thresh, character_thresh, filter_tags, replacement_rules_text):
-            images = [Image.open(file.name) for file in files]
-            results = predictor.predict(images, model_repo, general_thresh, character_thresh)
-            # Parse filter tags
-            filter_set = set(tag.strip().lower() for tag in filter_tags.split(","))
-            # Parse user-defined replacements
-            replacement_rules = parse_replacement_rules(replacement_rules_text)
-            # Generate formatted output
-            prompts = []
-            for general_tags, character_tags in results:
-                # Apply replacements
-                general_tags = apply_replacements(general_tags, replacement_rules)
-                character_tags = apply_replacements(character_tags, replacement_rules)
-                # Remove filtered tags and format
-                general_tags = [tag.replace('_', ' ') for tag in general_tags if tag.lower() not in filter_set]
-                character_tags = [tag.replace('_', ' ') for tag in character_tags if tag.lower() not in filter_set]
-                # Construct final prompt
-                if character_tags:
-                    prompts.append(f"{', '.join(character_tags)}, {', '.join(general_tags)}")
-                else:
-                    prompts.append(", ".join(general_tags))
-            return "\n\n".join(prompts)
-        # Modify UI to include replacement rules input
-        with gr.Blocks(title=TITLE) as demo:
-            gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
-            gr.Markdown(DESCRIPTION)
-            with gr.Row():
-                with gr.Column():
-                    image_files = gr.File(file_types=["image"], label="Upload Images", file_count="multiple")
-                    with gr.Accordion("Advanced Settings", open=False):
-                        model_repo = gr.Dropdown(model_repos, value=VIT_MODEL_DSV3_REPO, label="Select Model")
-                        general_thresh = gr.Slider(0, 1, step=args.score_slider_step, value=args.score_general_threshold, label="General Tags Threshold")
-                        character_thresh = gr.Slider(0, 1, step=args.score_slider_step, value=args.score_character_threshold, label="Character Tags Threshold")
-                        filter_tags = gr.Textbox(value=", ".join(predefined_tags), label="Filter Tags (comma-separated)", lines=3)
-                    submit = gr.Button(value="Process Images", variant="primary")
-                with gr.Column():
-                    output = gr.Textbox(label="Output", lines=10)
-            # Separate input for tag replacements
-            with gr.Accordion("Tag Replacements", open=False):
-                replacement_rules_text = gr.Textbox(label="Enter replacement rules (one per line)", placeholder="e.g.,\n1boy -> 1girl\nwinter, indoors, living room -> summer, outdoors", lines=5)
-            submit.click(process_images, inputs=[image_files, model_repo, general_thresh, character_thresh, filter_tags, replacement_rules_text], outputs=output)
-    demo.queue(max_size=10)
     demo.launch()
-if __name__ == "__main__":
-    main()

 # Dataset v3 series of models:
 SWINV2_MODEL_DSV3_REPO = "SmilingWolf/wd-swinv2-tagger-v3"
 # Dataset v2 series of models:
 MOAT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-moat-tagger-v2"
 # IdolSankaku series of models:
 EVA02_LARGE_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-eva02-large-tagger-v1"
 # Files to download from the repos
 MODEL_FILENAME = "model.onnx"
     character_indexes = list(np.where(dataframe["category"] == 4)[0])
     return tag_names, general_indexes, character_indexes
+def parse_replacements(replacement_text):
+    replacements = {}
+    for line in replacement_text.strip().split("\n"):
+        parts = line.split("->")
+        if len(parts) == 2:
+            old_tags = tuple(tag.strip().lower() for tag in parts[0].split(","))
+            new_tags = [tag.strip() for tag in parts[1].split(",")]
+            replacements[old_tags] = new_tags
+    return replacements
+def apply_replacements(tags, replacements):
+    modified_tags = set(tags)
+    for old_tags, new_tags in replacements.items():
+        if all(tag in modified_tags for tag in old_tags):
+            for tag in old_tags:
+                modified_tags.discard(tag)
+            modified_tags.update(new_tags)
+    return list(modified_tags)
 class Predictor:
     def __init__(self):
         self.model_target_size = None
         self.model = model
     def prepare_image(self, image):
         if image.mode != "RGBA":
             image = image.convert("RGBA")
+        image = image.convert("RGB")
         max_dim = max(image.size)
         padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
         pad_left = (max_dim - image.width) // 2
         padded_image.paste(image, (pad_left, pad_top))
         padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
         image_array = np.asarray(padded_image, dtype=np.float32)[:, :, ::-1]
         return np.expand_dims(image_array, axis=0)
     def predict(self, images, model_repo, general_thresh, character_thresh):
         self.load_model(model_repo)
         results = []
         for image in images:
             image = self.prepare_image(image)
             input_name = self.model.get_inputs()[0].name
             general_res = [x[0] for i, x in enumerate(labels) if i in self.general_indexes and x[1] > general_thresh]
             character_res = [x[0] for i, x in enumerate(labels) if i in self.character_indexes and x[1] > character_thresh]
             results.append((general_res, character_res))
         return results
+def process_images(files, model_repo, general_thresh, character_thresh, filter_tags, replacement_text):
+    images = [Image.open(file.name) for file in files]
+    results = predictor.predict(images, model_repo, general_thresh, character_thresh)
+    filter_set = set(tag.strip().lower() for tag in filter_tags.split(","))
+    replacements = parse_replacements(replacement_text)
+    prompts = []
+    for general_tags, character_tags in results:
+        character_tags = apply_replacements([tag.replace("_", " ") for tag in character_tags if tag.lower() not in filter_set], replacements)
+        general_tags = apply_replacements([tag.replace("_", " ") for tag in general_tags if tag.lower() not in filter_set], replacements)
+        prompt = ", ".join(character_tags + general_tags)
+        prompts.append(prompt)
+    return "\n\n".join(prompts)
+predictor = Predictor()
+with gr.Blocks(title=TITLE) as demo:
+    gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            image_files = gr.File(file_types=["image"], label="Upload Images", file_count="multiple")
+            replacement_text = gr.Textbox(label="Tag Replacements", placeholder="e.g., 1boy -> 1girl\nwinter, indoors -> summer, outdoors", lines=5)
+            submit = gr.Button("Process Images", variant="primary")
+        with gr.Column():
+            output = gr.Textbox(label="Output", lines=10)
+    submit.click(process_images, inputs=[image_files, replacement_text], outputs=output)
     demo.launch()