import gradio as gr import torch import numpy as np import os import shutil from PIL import Image from transformers import pipeline import clip from huggingface_hub import hf_hub_download import onnxruntime as rt import pandas as pd import time # Utility class for Waifu Scorer class MLP(torch.nn.Module): def __init__(self, input_size, xcol='emb', ycol='avg_rating', batch_norm=True): super().__init__() self.input_size = input_size self.xcol = xcol self.ycol = ycol self.layers = torch.nn.Sequential( torch.nn.Linear(self.input_size, 2048), torch.nn.ReLU(), torch.nn.BatchNorm1d(2048) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.3), torch.nn.Linear(2048, 512), torch.nn.ReLU(), torch.nn.BatchNorm1d(512) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.3), torch.nn.Linear(512, 256), torch.nn.ReLU(), torch.nn.BatchNorm1d(256) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.2), torch.nn.Linear(256, 128), torch.nn.ReLU(), torch.nn.BatchNorm1d(128) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.1), torch.nn.Linear(128, 32), torch.nn.ReLU(), torch.nn.Linear(32, 1) ) def forward(self, x): return self.layers(x) class WaifuScorer: def __init__(self, device='cuda' if torch.cuda.is_available() else 'cpu'): self.device = device model_path = hf_hub_download("Eugeoter/waifu-scorer-v4-beta", "model.pth", cache_dir="models") self.mlp = self._load_model(model_path, input_size=768, device=device) self.model2, self.preprocess = clip.load("ViT-L/14", device=device) self.dtype = self.mlp.dtype self.mlp.eval() def _load_model(self, model_path, input_size=768, device='cuda'): model = MLP(input_size=input_size) s = torch.load(model_path, map_location=device) model.load_state_dict(s) model.to(device) return model def _normalized(self, a, order=2, dim=-1): l2 = a.norm(order, dim, keepdim=True) l2[l2 == 0] = 1 return a / l2 @torch.no_grad() def _encode_images(self, images): if isinstance(images, Image.Image): images = [images] image_tensors = [self.preprocess(img).unsqueeze(0) for img in images] image_batch = torch.cat(image_tensors).to(self.device) image_features = self.model2.encode_image(image_batch) im_emb_arr = self._normalized(image_features).cpu().float() return im_emb_arr @torch.no_grad() def score(self, image): if isinstance(image, np.ndarray): image = Image.fromarray(image) images = [image, image] # batch norm needs at least 2 images images = self._encode_images(images).to(device=self.device, dtype=self.dtype) predictions = self.mlp(images) scores = predictions.clamp(0, 10).cpu().numpy().reshape(-1).tolist() return scores[0] # Return first score only class AnimeAestheticPredictor: def __init__(self): model_path = hf_hub_download(repo_id="skytnt/anime-aesthetic", filename="model.onnx", cache_dir="models") self.model = rt.InferenceSession(model_path, providers=['CPUExecutionProvider']) def predict(self, img): if isinstance(img, Image.Image): img = np.array(img) img = img.astype(np.float32) / 255 s = 768 h, w = img.shape[:-1] h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s) ph, pw = s - h, s - w img_input = np.zeros([s, s, 3], dtype=np.float32) img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = cv2.resize(img, (w, h)) img_input = np.transpose(img_input, (2, 0, 1)) img_input = img_input[np.newaxis, :] pred = self.model.run(None, {"img": img_input})[0].item() return pred class ImageEvaluator: def __init__(self): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.setup_models() self.results_df = None self.temp_dir = "temp_images" if not os.path.exists(self.temp_dir): os.makedirs(self.temp_dir) if not os.path.exists("output"): os.makedirs("output/hq_folder", exist_ok=True) os.makedirs("output/lq_folder", exist_ok=True) def setup_models(self): # Initialize all models print("Setting up models (this may take a few minutes)...") # ShadowLilac's aesthetic model self.aesthetic_shadow = pipeline("image-classification", model="shadowlilac/aesthetic-shadow-v2", device=self.device) # WaifuScorer model try: self.waifu_scorer = WaifuScorer(device=self.device) except Exception as e: print(f"Error loading WaifuScorer: {e}") self.waifu_scorer = None # CafeAI models self.cafe_aesthetic = pipeline("image-classification", "cafeai/cafe_aesthetic") self.cafe_style = pipeline("image-classification", "cafeai/cafe_style") self.cafe_waifu = pipeline("image-classification", "cafeai/cafe_waifu") # Anime Aesthetic model self.anime_aesthetic = AnimeAestheticPredictor() print("All models loaded successfully!") def evaluate_image(self, image_path): """Evaluate a single image with all models""" if isinstance(image_path, str): image = Image.open(image_path).convert('RGB') else: image = image_path results = {} # ShadowLilac evaluation shadow_result = self.aesthetic_shadow(images=[image]) results["shadow_hq"] = round([p for p in shadow_result[0] if p['label'] == 'hq'][0]['score'], 2) # WaifuScorer evaluation if self.waifu_scorer: try: results["waifu_score"] = round(self.waifu_scorer.score(image), 2) except Exception as e: results["waifu_score"] = 0 print(f"Error with WaifuScorer: {e}") # CafeAI evaluations cafe_aesthetic_result = self.cafe_aesthetic(image, top_k=2) results["cafe_aesthetic"] = round(next((item["score"] for item in cafe_aesthetic_result if item["label"] == "aesthetic"), 0), 2) # Get top style cafe_style_result = self.cafe_style(image, top_k=5) results["cafe_top_style"] = cafe_style_result[0]["label"] results["cafe_top_style_score"] = round(cafe_style_result[0]["score"], 2) # Get top waifu style if applicable cafe_waifu_result = self.cafe_waifu(image, top_k=5) results["cafe_top_waifu"] = cafe_waifu_result[0]["label"] results["cafe_top_waifu_score"] = round(cafe_waifu_result[0]["score"], 2) # Anime aesthetic evaluation try: results["anime_aesthetic"] = round(self.anime_aesthetic.predict(image), 2) except Exception as e: results["anime_aesthetic"] = 0 print(f"Error with Anime Aesthetic: {e}") # Calculate average score scores = [results["shadow_hq"] * 10] # Scale to 0-10 if self.waifu_scorer: scores.append(results["waifu_score"]) scores.append(results["cafe_aesthetic"] * 10) # Scale to 0-10 scores.append(results["anime_aesthetic"]) results["average_score"] = round(sum(scores) / len(scores), 2) return results def process_images(self, files, threshold=0.5, progress=None): """Process multiple images and return results dataframe""" results = [] total_files = len(files) # Clean temp directory for f in os.listdir(self.temp_dir): os.remove(os.path.join(self.temp_dir, f)) # Process each file and save a copy to temp directory for i, file in enumerate(files): if progress is not None: progress(i / total_files, f"Processing {i+1}/{total_files}: {os.path.basename(file)}") # Copy file to temp directory with clean name filename = os.path.basename(file) temp_path = os.path.join(self.temp_dir, filename) shutil.copy(file, temp_path) # Evaluate the image results_dict = self.evaluate_image(temp_path) results_dict["filename"] = filename results_dict["path"] = temp_path results_dict["is_hq"] = results_dict["shadow_hq"] >= threshold # Copy to output directory based on HQ threshold destination = "output/hq_folder" if results_dict["is_hq"] else "output/lq_folder" shutil.copy(temp_path, os.path.join(destination, filename)) results.append(results_dict) # Create dataframe and sort by average score self.results_df = pd.DataFrame(results) self.results_df = self.results_df.sort_values(by="average_score", ascending=False) if progress is not None: progress(1.0, "Processing complete!") return self.results_df def get_results_html(self): """Generate HTML with results and image previews""" if self.results_df is None: return "

No results available. Please process images first.

" html = "

Results (Sorted by Average Score)

" html += "" html += "" html += "" html += "" html += "" html += "" if "waifu_score" in self.results_df.columns: html += "" html += "" html += "" html += "" html += "" for _, row in self.results_df.iterrows(): # Determine row color based on HQ status row_color = "#e8f5e9" if row["is_hq"] else "#ffebee" html += f"" # Image thumbnail html += f"" # Filename html += f"" # Average score html += f"" # Shadow HQ score html += f"" # Waifu score if "waifu_score" in self.results_df.columns: html += f"" # Cafe aesthetic html += f"" # Anime aesthetic html += f"" # Top style html += f"" html += "" html += "
ImageFilenameAverageShadow HQWaifuCafeAnimeStyle
{row['filename']}{row['average_score']}{row['shadow_hq']}{row['waifu_score']}{row['cafe_aesthetic']}{row['anime_aesthetic']}{row['cafe_top_style']} ({row['cafe_top_style_score']})
" return html def export_results_csv(self, output_path="results.csv"): """Export results to CSV file""" if self.results_df is not None: self.results_df.to_csv(output_path, index=False) return f"Results exported to {output_path}" return "No results to export" # Create Gradio interface def create_interface(): evaluator = ImageEvaluator() with gr.Blocks(title="Comprehensive Image Evaluation Tool", theme=gr.themes.Soft()) as app: gr.Markdown(""" # 🖼️ Comprehensive Image Evaluation Tool Upload images to evaluate their aesthetic quality using multiple models: - **ShadowLilac** - General aesthetic quality (0-1) - **WaifuScorer** - Anime-style quality score (0-10) - **CafeAI** - Style classification and aesthetic assessment - **Anime Aesthetic** - Specialized for anime/manga art (0-10) The tool will provide an average score and classify images as high or low quality based on your threshold. """) with gr.Row(): with gr.Column(scale=1): input_files = gr.Files(label="Upload Images", file_types=["image"], file_count="multiple") threshold = gr.Slider(label="HQ Threshold (ShadowLilac score)", min=0, max=1, value=0.5, step=0.01) process_btn = gr.Button("Process Images", variant="primary") progress_bar = gr.Progress() export_btn = gr.Button("Export Results to CSV") export_msg = gr.Textbox(label="Export Status") with gr.Column(scale=2): results_html = gr.HTML(label="Results") with gr.Row(): gr.Markdown(""" ### Single Image Evaluation Upload a single image to get detailed evaluation metrics. """) with gr.Row(): with gr.Column(scale=1): single_img = gr.Image(label="Upload Single Image", type="pil") single_eval_btn = gr.Button("Evaluate") with gr.Column(scale=2): shadow_score = gr.Number(label="ShadowLilac HQ Score (0-1)") waifu_score = gr.Number(label="Waifu Score (0-10)") cafe_aesthetic = gr.Number(label="Cafe Aesthetic Score (0-1)") anime_aesthetic = gr.Number(label="Anime Aesthetic Score (0-10)") average_score = gr.Number(label="Average Score (0-10)") style_label = gr.Label(label="Top Style Categories (Cafe)") def process_images_callback(files, threshold, progress=progress_bar): file_paths = [f.name for f in files] evaluator.process_images(file_paths, threshold, progress) return evaluator.get_results_html() def export_callback(): timestamp = time.strftime("%Y%m%d-%H%M%S") filename = f"results_{timestamp}.csv" return evaluator.export_results_csv(filename) def evaluate_single(image): if image is None: return 0, 0, 0, 0, 0, [] results = evaluator.evaluate_image(image) # Prepare style labels style_data = { results["cafe_top_style"]: results["cafe_top_style_score"], results["cafe_top_waifu"]: results["cafe_top_waifu_score"] } return ( results["shadow_hq"], results["waifu_score"] if "waifu_score" in results else 0, results["cafe_aesthetic"], results["anime_aesthetic"], results["average_score"], style_data ) # Set up event handlers process_btn.click( process_images_callback, inputs=[input_files, threshold], outputs=[results_html] ) export_btn.click( export_callback, inputs=[], outputs=[export_msg] ) single_eval_btn.click( evaluate_single, inputs=[single_img], outputs=[shadow_score, waifu_score, cafe_aesthetic, anime_aesthetic, average_score, style_label] ) return app if __name__ == "__main__": app = create_interface() app.launch()