import gradio as gr import torch import numpy as np import os import shutil from PIL import Image from transformers import pipeline import clip from huggingface_hub import hf_hub_download import onnxruntime as rt import pandas as pd import time # Utility class for Waifu Scorer class MLP(torch.nn.Module): def __init__(self, input_size, xcol='emb', ycol='avg_rating', batch_norm=True): super().__init__() self.input_size = input_size self.xcol = xcol self.ycol = ycol self.layers = torch.nn.Sequential( torch.nn.Linear(self.input_size, 2048), torch.nn.ReLU(), torch.nn.BatchNorm1d(2048) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.3), torch.nn.Linear(2048, 512), torch.nn.ReLU(), torch.nn.BatchNorm1d(512) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.3), torch.nn.Linear(512, 256), torch.nn.ReLU(), torch.nn.BatchNorm1d(256) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.2), torch.nn.Linear(256, 128), torch.nn.ReLU(), torch.nn.BatchNorm1d(128) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.1), torch.nn.Linear(128, 32), torch.nn.ReLU(), torch.nn.Linear(32, 1) ) def forward(self, x): return self.layers(x) class WaifuScorer: def __init__(self, device='cuda' if torch.cuda.is_available() else 'cpu'): self.device = device model_path = hf_hub_download("Eugeoter/waifu-scorer-v4-beta", "model.pth", cache_dir="models") self.mlp = self._load_model(model_path, input_size=768, device=device) self.model2, self.preprocess = clip.load("ViT-L/14", device=device) self.dtype = self.mlp.dtype self.mlp.eval() def _load_model(self, model_path, input_size=768, device='cuda'): model = MLP(input_size=input_size) s = torch.load(model_path, map_location=device) model.load_state_dict(s) model.to(device) return model def _normalized(self, a, order=2, dim=-1): l2 = a.norm(order, dim, keepdim=True) l2[l2 == 0] = 1 return a / l2 @torch.no_grad() def _encode_images(self, images): if isinstance(images, Image.Image): images = [images] image_tensors = [self.preprocess(img).unsqueeze(0) for img in images] image_batch = torch.cat(image_tensors).to(self.device) image_features = self.model2.encode_image(image_batch) im_emb_arr = self._normalized(image_features).cpu().float() return im_emb_arr @torch.no_grad() def score(self, image): if isinstance(image, np.ndarray): image = Image.fromarray(image) images = [image, image] # batch norm needs at least 2 images images = self._encode_images(images).to(device=self.device, dtype=self.dtype) predictions = self.mlp(images) scores = predictions.clamp(0, 10).cpu().numpy().reshape(-1).tolist() return scores[0] # Return first score only class AnimeAestheticPredictor: def __init__(self): model_path = hf_hub_download(repo_id="skytnt/anime-aesthetic", filename="model.onnx", cache_dir="models") self.model = rt.InferenceSession(model_path, providers=['CPUExecutionProvider']) def predict(self, img): if isinstance(img, Image.Image): img = np.array(img) img = img.astype(np.float32) / 255 s = 768 h, w = img.shape[:-1] h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s) ph, pw = s - h, s - w img_input = np.zeros([s, s, 3], dtype=np.float32) img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = cv2.resize(img, (w, h)) img_input = np.transpose(img_input, (2, 0, 1)) img_input = img_input[np.newaxis, :] pred = self.model.run(None, {"img": img_input})[0].item() return pred class ImageEvaluator: def __init__(self): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.setup_models() self.results_df = None self.temp_dir = "temp_images" if not os.path.exists(self.temp_dir): os.makedirs(self.temp_dir) if not os.path.exists("output"): os.makedirs("output/hq_folder", exist_ok=True) os.makedirs("output/lq_folder", exist_ok=True) def setup_models(self): # Initialize all models print("Setting up models (this may take a few minutes)...") # ShadowLilac's aesthetic model self.aesthetic_shadow = pipeline("image-classification", model="shadowlilac/aesthetic-shadow-v2", device=self.device) # WaifuScorer model try: self.waifu_scorer = WaifuScorer(device=self.device) except Exception as e: print(f"Error loading WaifuScorer: {e}") self.waifu_scorer = None # CafeAI models self.cafe_aesthetic = pipeline("image-classification", "cafeai/cafe_aesthetic") self.cafe_style = pipeline("image-classification", "cafeai/cafe_style") self.cafe_waifu = pipeline("image-classification", "cafeai/cafe_waifu") # Anime Aesthetic model self.anime_aesthetic = AnimeAestheticPredictor() print("All models loaded successfully!") def evaluate_image(self, image_path): """Evaluate a single image with all models""" if isinstance(image_path, str): image = Image.open(image_path).convert('RGB') else: image = image_path results = {} # ShadowLilac evaluation shadow_result = self.aesthetic_shadow(images=[image]) results["shadow_hq"] = round([p for p in shadow_result[0] if p['label'] == 'hq'][0]['score'], 2) # WaifuScorer evaluation if self.waifu_scorer: try: results["waifu_score"] = round(self.waifu_scorer.score(image), 2) except Exception as e: results["waifu_score"] = 0 print(f"Error with WaifuScorer: {e}") # CafeAI evaluations cafe_aesthetic_result = self.cafe_aesthetic(image, top_k=2) results["cafe_aesthetic"] = round(next((item["score"] for item in cafe_aesthetic_result if item["label"] == "aesthetic"), 0), 2) # Get top style cafe_style_result = self.cafe_style(image, top_k=5) results["cafe_top_style"] = cafe_style_result[0]["label"] results["cafe_top_style_score"] = round(cafe_style_result[0]["score"], 2) # Get top waifu style if applicable cafe_waifu_result = self.cafe_waifu(image, top_k=5) results["cafe_top_waifu"] = cafe_waifu_result[0]["label"] results["cafe_top_waifu_score"] = round(cafe_waifu_result[0]["score"], 2) # Anime aesthetic evaluation try: results["anime_aesthetic"] = round(self.anime_aesthetic.predict(image), 2) except Exception as e: results["anime_aesthetic"] = 0 print(f"Error with Anime Aesthetic: {e}") # Calculate average score scores = [results["shadow_hq"] * 10] # Scale to 0-10 if self.waifu_scorer: scores.append(results["waifu_score"]) scores.append(results["cafe_aesthetic"] * 10) # Scale to 0-10 scores.append(results["anime_aesthetic"]) results["average_score"] = round(sum(scores) / len(scores), 2) return results def process_images(self, files, threshold=0.5, progress=None): """Process multiple images and return results dataframe""" results = [] total_files = len(files) # Clean temp directory for f in os.listdir(self.temp_dir): os.remove(os.path.join(self.temp_dir, f)) # Process each file and save a copy to temp directory for i, file in enumerate(files): if progress is not None: progress(i / total_files, f"Processing {i+1}/{total_files}: {os.path.basename(file)}") # Copy file to temp directory with clean name filename = os.path.basename(file) temp_path = os.path.join(self.temp_dir, filename) shutil.copy(file, temp_path) # Evaluate the image results_dict = self.evaluate_image(temp_path) results_dict["filename"] = filename results_dict["path"] = temp_path results_dict["is_hq"] = results_dict["shadow_hq"] >= threshold # Copy to output directory based on HQ threshold destination = "output/hq_folder" if results_dict["is_hq"] else "output/lq_folder" shutil.copy(temp_path, os.path.join(destination, filename)) results.append(results_dict) # Create dataframe and sort by average score self.results_df = pd.DataFrame(results) self.results_df = self.results_df.sort_values(by="average_score", ascending=False) if progress is not None: progress(1.0, "Processing complete!") return self.results_df def get_results_html(self): """Generate HTML with results and image previews""" if self.results_df is None: return "
No results available. Please process images first.
" html = "Image | " html += "Filename | " html += "Average | " html += "Shadow HQ | " if "waifu_score" in self.results_df.columns: html += "Waifu | " html += "Cafe | " html += "Anime | " html += "Style | " html += "
---|---|---|---|---|---|---|---|
{row['filename']} | " # Average score html += f"{row['average_score']} | " # Shadow HQ score html += f"{row['shadow_hq']} | " # Waifu score if "waifu_score" in self.results_df.columns: html += f"{row['waifu_score']} | " # Cafe aesthetic html += f"{row['cafe_aesthetic']} | " # Anime aesthetic html += f"{row['anime_aesthetic']} | " # Top style html += f"{row['cafe_top_style']} ({row['cafe_top_style_score']}) | " html += "