Spaces:

ura23
/

wd-tagger

Running

App Files Files Community

ura23 commited on Jan 29

Commit

d1d64c5

verified ·

1 Parent(s): 8d94560

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -87

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import argparse
 import os
 import gradio as gr
 import huggingface_hub
 import numpy as np
@@ -8,90 +9,40 @@ import pandas as pd
 from PIL import Image
 TITLE = "WaifuDiffusion Tagger"
-DESCRIPTION = "Demo for the WaifuDiffusion tagger models"
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 VIT_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-tagger-v3"
 MODEL_FILENAME = "model.onnx"
 LABEL_FILENAME = "selected_tags.csv"
-PREDEFINED_FILTER_TAGS = [
-    "loli", "oppai_loli", "onee-shota", "incest", "furry", "furry_female", "shota",
-    "male_focus", "signature", "otoko_no_ko", "minigirl", "patreon_username", "babydoll",
-    "monochrome", "happy_birthday", "happy_new_year", "thought_bubble", "greyscale",
-    "speech_bubble", "english_text", "copyright_name", "twitter_username",
-    "patreon username", "patreon logo", "cover", "content_rating", "cover_page",
-    "doujin_cover", "sex", "artist_name", "watermark", "censored", "bar_censor",
-    "blank_censor", "blur_censor", "light_censor", "mosaic_censoring"
-]
-def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--score-slider-step", type=float, default=0.05)
     parser.add_argument("--score-general-threshold", type=float, default=0.3)
     parser.add_argument("--score-character-threshold", type=float, default=1.0)
     return parser.parse_args()
-def load_labels(dataframe):
-    tag_names = dataframe["name"].tolist()
-    general_indexes = list(np.where(dataframe["category"] == 0)[0])
-    character_indexes = list(np.where(dataframe["category"] == 4)[0])
-    return tag_names, general_indexes, character_indexes
-class Predictor:
-    def __init__(self):
-        self.model_target_size = None
-        self.last_loaded_repo = None
-    def download_model(self, model_repo):
-        csv_path = huggingface_hub.hf_hub_download(model_repo, LABEL_FILENAME, use_auth_token=HF_TOKEN)
-        model_path = huggingface_hub.hf_hub_download(model_repo, MODEL_FILENAME, use_auth_token=HF_TOKEN)
-        return csv_path, model_path
-    def load_model(self, model_repo):
-        if model_repo == self.last_loaded_repo:
-            return
-        csv_path, model_path = self.download_model(model_repo)
-        tags_df = pd.read_csv(csv_path)
-        self.tag_names, self.general_indexes, self.character_indexes = load_labels(tags_df)
-        model = rt.InferenceSession(model_path)
-        _, height, width, _ = model.get_inputs()[0].shape
-        self.model_target_size = height
-        self.last_loaded_repo = model_repo
-        self.model = model
-    def prepare_image(self, image):
-        max_dim = max(image.size)
-        padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
-        pad_left = (max_dim - image.width) // 2
-        pad_top = (max_dim - image.height) // 2
-        padded_image.paste(image, (pad_left, pad_top))
-        padded_image = padded_image.resize((self.model_target_size, self.model_target_size), Image.BICUBIC)
-        return np.expand_dims(np.asarray(padded_image, dtype=np.float32)[:, :, ::-1], axis=0)
-    def predict(self, images, model_repo, general_thresh, character_thresh):
-        self.load_model(model_repo)
-        results = []
-        for image in images:
-            image = self.prepare_image(image)
-            input_name = self.model.get_inputs()[0].name
-            label_name = self.model.get_outputs()[0].name
-            preds = self.model.run([label_name], {input_name: image})[0]
-            labels = list(zip(self.tag_names, preds[0].astype(float)))
-            general_res = [x[0] for i, x in enumerate(labels) if i in self.general_indexes and x[1] > general_thresh]
-            character_res = [x[0] for i, x in enumerate(labels) if i in self.character_indexes and x[1] > character_thresh]
-            results.append((general_res, character_res))
-        return results
-predictor = Predictor()
 def parse_replacement_rules(rules_text):
     rules = {}
     for line in rules_text.strip().split("\n"):
@@ -113,23 +64,43 @@ def parse_fallback_rules(fallback_text):
 def apply_replacements(tags, replacement_rules):
     tags_set = set(tags)
     for old_tags, new_tags in replacement_rules.items():
         if set(old_tags).issubset(tags_set):
             tags_set.difference_update(old_tags)
             tags_set.update(new_tags)
     return list(tags_set)
 def apply_fallbacks(tags, fallback_rules):
     tags_set = set(tags)
     for expected_tags, fallback_tag in fallback_rules.items():
         if not any(tag in tags_set for tag in expected_tags):
             tags_set.add(fallback_tag)
     return list(tags_set)
 def process_images(files, model_repo, general_thresh, character_thresh, filter_tags, replacement_rules_text, fallback_rules_text):
     images = [Image.open(file.name) for file in files]
     results = predictor.predict(images, model_repo, general_thresh, character_thresh)
@@ -142,34 +113,46 @@ def process_images(files, model_repo, general_thresh, character_thresh, filter_t
     for general_tags, character_tags in results:
         general_tags = apply_replacements(general_tags, replacement_rules)
         character_tags = apply_replacements(character_tags, replacement_rules)
         general_tags = apply_fallbacks(general_tags, fallback_rules)
         character_tags = apply_fallbacks(character_tags, fallback_rules)
         general_tags = [tag.replace('_', ' ') for tag in general_tags if tag.lower() not in filter_set]
         character_tags = [tag.replace('_', ' ') for tag in character_tags if tag.lower() not in filter_set]
-        if character_tags:
-            prompts.append(f"{', '.join(character_tags)}, {', '.join(general_tags)}")
-        else:
-            prompts.append(", ".join(general_tags))
     return "\n\n".join(prompts)
 args = parse_args()
 with gr.Blocks(title=TITLE) as demo:
     gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
     gr.Markdown(DESCRIPTION)
-    with gr.Accordion("Settings", open=False):
-        filter_tags = gr.Textbox(value=", ".join(PREDEFINED_FILTER_TAGS), label="Filter Tags (comma-separated)", lines=3)
-        replacement_rules_text = gr.Textbox(label="Replacement Rules", value="1boy -> 1girl", lines=5)
-        fallback_rules_text = gr.Textbox(label="Fallback Rules", value="sad, happy -> smile", lines=5)
-    submit = gr.Button(value="Process Images")
-    output = gr.Textbox(label="Output", lines=10)
-    submit.click(process_images, inputs=[[], VIT_MODEL_DSV3_REPO, args.score_general_threshold, args.score_character_threshold, filter_tags, replacement_rules_text, fallback_rules_text], outputs=output)
 demo.launch()

 import argparse
 import os
 import gradio as gr
 import huggingface_hub
 import numpy as np
 from PIL import Image
 TITLE = "WaifuDiffusion Tagger"
+DESCRIPTION = """
+Demo for the WaifuDiffusion tagger models
+"""
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
+# Dataset v3 series of models:
+SWINV2_MODEL_DSV3_REPO = "SmilingWolf/wd-swinv2-tagger-v3"
+CONV_MODEL_DSV3_REPO = "SmilingWolf/wd-convnext-tagger-v3"
 VIT_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-tagger-v3"
+VIT_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-vit-large-tagger-v3"
+EVA02_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-eva02-large-tagger-v3"
+# Dataset v2 series of models:
+MOAT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-moat-tagger-v2"
+SWIN_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-swinv2-tagger-v2"
+CONV_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
+CONV2_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-convnextv2-tagger-v2"
+VIT_MODEL_DSV2_REPO = "SmilingWolf/wd-v1-4-vit-tagger-v2"
+# IdolSankaku series of models:
+EVA02_LARGE_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-eva02-large-tagger-v1"
+SWINV2_MODEL_IS_DSV1_REPO = "deepghs/idolsankaku-swinv2-tagger-v1"
 MODEL_FILENAME = "model.onnx"
 LABEL_FILENAME = "selected_tags.csv"
+def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
     parser.add_argument("--score-slider-step", type=float, default=0.05)
     parser.add_argument("--score-general-threshold", type=float, default=0.3)
     parser.add_argument("--score-character-threshold", type=float, default=1.0)
     return parser.parse_args()
 def parse_replacement_rules(rules_text):
     rules = {}
     for line in rules_text.strip().split("\n"):
 def apply_replacements(tags, replacement_rules):
     tags_set = set(tags)
     for old_tags, new_tags in replacement_rules.items():
         if set(old_tags).issubset(tags_set):
             tags_set.difference_update(old_tags)
             tags_set.update(new_tags)
     return list(tags_set)
 def apply_fallbacks(tags, fallback_rules):
     tags_set = set(tags)
     for expected_tags, fallback_tag in fallback_rules.items():
         if not any(tag in tags_set for tag in expected_tags):
             tags_set.add(fallback_tag)
     return list(tags_set)
+class Predictor:
+    def __init__(self):
+        self.model_target_size = None
+        self.last_loaded_repo = None
+    def download_model(self, model_repo):
+        csv_path = huggingface_hub.hf_hub_download(model_repo, LABEL_FILENAME, use_auth_token=HF_TOKEN)
+        model_path = huggingface_hub.hf_hub_download(model_repo, MODEL_FILENAME, use_auth_token=HF_TOKEN)
+        return csv_path, model_path
+    def load_model(self, model_repo):
+        if model_repo == self.last_loaded_repo:
+            return
+        csv_path, model_path = self.download_model(model_repo)
+        tags_df = pd.read_csv(csv_path)
+        self.tag_names, self.general_indexes, self.character_indexes = tags_df["name"].tolist(), list(np.where(tags_df["category"] == 0)[0]), list(np.where(tags_df["category"] == 4)[0])
+        model = rt.InferenceSession(model_path)
+        _, height, width, _ = model.get_inputs()[0].shape
+        self.model_target_size = height
+        self.last_loaded_repo = model_repo
+        self.model = model
 def process_images(files, model_repo, general_thresh, character_thresh, filter_tags, replacement_rules_text, fallback_rules_text):
     images = [Image.open(file.name) for file in files]
     results = predictor.predict(images, model_repo, general_thresh, character_thresh)
     for general_tags, character_tags in results:
         general_tags = apply_replacements(general_tags, replacement_rules)
         character_tags = apply_replacements(character_tags, replacement_rules)
         general_tags = apply_fallbacks(general_tags, fallback_rules)
         character_tags = apply_fallbacks(character_tags, fallback_rules)
         general_tags = [tag.replace('_', ' ') for tag in general_tags if tag.lower() not in filter_set]
         character_tags = [tag.replace('_', ' ') for tag in character_tags if tag.lower() not in filter_set]
+        prompts.append(f"{', '.join(character_tags)}, {', '.join(general_tags)}" if character_tags else ", ".join(general_tags))
     return "\n\n".join(prompts)
 args = parse_args()
+predictor = Predictor()
+model_repos = [SWINV2_MODEL_DSV3_REPO, CONV_MODEL_DSV3_REPO, VIT_MODEL_DSV3_REPO, VIT_LARGE_MODEL_DSV3_REPO, EVA02_LARGE_MODEL_DSV3_REPO, MOAT_MODEL_DSV2_REPO, SWIN_MODEL_DSV2_REPO, CONV_MODEL_DSV2_REPO, CONV2_MODEL_DSV2_REPO, VIT_MODEL_DSV2_REPO, SWINV2_MODEL_IS_DSV1_REPO, EVA02_LARGE_MODEL_IS_DSV1_REPO]
 with gr.Blocks(title=TITLE) as demo:
     gr.Markdown(f"<h1 style='text-align: center;'>{TITLE}</h1>")
     gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column():
+            image_files = gr.File(file_types=["image"], label="Upload Images", file_count="multiple")
+            with gr.Accordion("Advanced Settings", open=False):
+                model_repo = gr.Dropdown(model_repos, value=VIT_MODEL_DSV3_REPO, label="Select Model")
+                general_thresh = gr.Slider(0, 1, step=args.score_slider_step, value=args.score_general_threshold, label="General Tags Threshold")
+                character_thresh = gr.Slider(0, 1, step=args.score_slider_step, value=args.score_character_threshold, label="Character Tags Threshold")
+            submit = gr.Button(value="Process Images", variant="primary")
+        with gr.Column():
+            output = gr.Textbox(label="Output", lines=10)
+    with gr.Accordion("Tag Replacements", open=False):
+        replacement_rules_text = gr.Textbox(label="Replacement Rules", lines=5)
+    with gr.Accordion("Fallback Rules", open=False):
+        fallback_rules_text = gr.Textbox(label="Fallback Rules", lines=5)
+    submit.click(process_images, inputs=[image_files, model_repo, general_thresh, character_thresh, replacement_rules_text, fallback_rules_text], outputs=output)
+demo.queue()
 demo.launch()