import gradio as gr import torch import os import numpy as np import cv2 import onnxruntime as rt from PIL import Image from transformers import pipeline from huggingface_hub import hf_hub_download import pandas as pd import tempfile import shutil import base64 from io import BytesIO # Import necessary function from aesthetic_predictor_v2_5 from aesthetic_predictor_v2_5 import convert_v2_5_from_siglip class MLP(torch.nn.Module): def __init__(self, input_size, xcol='emb', ycol='avg_rating', batch_norm=True): super().__init__() self.input_size = input_size self.xcol = xcol self.ycol = ycol self.layers = torch.nn.Sequential( torch.nn.Linear(self.input_size, 2048), torch.nn.ReLU(), torch.nn.BatchNorm1d(2048) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.3), torch.nn.Linear(2048, 512), torch.nn.ReLU(), torch.nn.BatchNorm1d(512) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.3), torch.nn.Linear(512, 256), torch.nn.ReLU(), torch.nn.BatchNorm1d(256) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.2), torch.nn.Linear(256, 128), torch.nn.ReLU(), torch.nn.BatchNorm1d(128) if batch_norm else torch.nn.Identity(), torch.nn.Dropout(0.1), torch.nn.Linear(128, 32), torch.nn.ReLU(), torch.nn.Linear(32, 1) ) def forward(self, x): return self.layers(x) class WaifuScorer(object): def __init__(self, model_path=None, device='cuda', cache_dir=None, verbose=False): self.verbose = verbose try: import clip if model_path is None: model_path = "Eugeoter/waifu-scorer-v3/model.pth" if self.verbose: print(f"model path not set, switch to default: `{model_path}`") if not os.path.isfile(model_path): split = model_path.split("/") username, repo_id, model_name = split[-3], split[-2], split[-1] model_path = hf_hub_download(f"{username}/{repo_id}", model_name, cache_dir=cache_dir) print(f"Loading WaifuScorer model from `{model_path}`") self.mlp = MLP(input_size=768) if model_path.endswith(".safetensors"): from safetensors.torch import load_file state_dict = load_file(model_path) else: state_dict = torch.load(model_path, map_location=device) self.mlp.load_state_dict(state_dict) self.mlp.to(device) self.model2, self.preprocess = clip.load("ViT-L/14", device=device) self.device = device self.dtype = torch.float32 self.mlp.eval() self.available = True except Exception as e: print(f"Unable to initialize WaifuScorer: {e}") self.available = False @torch.no_grad() def __call__(self, images): if not self.available: return [None] * (1 if not isinstance(images, list) else len(images)) if isinstance(images, Image.Image): images = [images] n = len(images) if n == 1: images = images*2 image_tensors = [self.preprocess(img).unsqueeze(0) for img in images] image_batch = torch.cat(image_tensors).to(self.device) image_features = self.model2.encode_image(image_batch) l2 = image_features.norm(2, dim=-1, keepdim=True) l2[l2 == 0] = 1 im_emb_arr = (image_features / l2).to(device=self.device, dtype=self.dtype) predictions = self.mlp(im_emb_arr) scores = predictions.clamp(0, 10).cpu().numpy().reshape(-1).tolist() return scores[:n] def load_aesthetic_predictor_v2_5(): class AestheticPredictorV2_5_Impl: # Renamed class to avoid confusion def __init__(self): print("Loading Aesthetic Predictor V2.5...") self.model, self.preprocessor = convert_v2_5_from_siglip( low_cpu_mem_usage=True, trust_remote_code=True, ) if torch.cuda.is_available(): self.model = self.model.to(torch.bfloat16).cuda() def inference(self, image: Image.Image) -> float: # preprocess image pixel_values = self.preprocessor( images=image.convert("RGB"), return_tensors="pt" ).pixel_values if torch.cuda.is_available(): pixel_values = pixel_values.to(torch.bfloat16).cuda() # predict aesthetic score with torch.inference_mode(): score = self.model(pixel_values).logits.squeeze().float().cpu().numpy() return score return AestheticPredictorV2_5_Impl() # Return an instance of the implementation class def load_anime_aesthetic_model(): model_path = hf_hub_download(repo_id="skytnt/anime-aesthetic", filename="model.onnx") model = rt.InferenceSession(model_path, providers=['CPUExecutionProvider']) return model def predict_anime_aesthetic(img, model): img = np.array(img).astype(np.float32) / 255 s = 768 h, w = img.shape[:-1] h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s) ph, pw = s - h, s - w img_input = np.zeros([s, s, 3], dtype=np.float32) img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = cv2.resize(img, (w, h)) img_input = np.transpose(img_input, (2, 0, 1)) img_input = img_input[np.newaxis, :] pred = model.run(None, {"img": img_input})[0].item() return pred class ImageEvaluationTool: def __init__(self): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f"Using device: {self.device}") print("Loading models... This may take some time.") print("Loading Aesthetic Shadow model...") self.aesthetic_shadow = pipeline("image-classification", model="NeoChen1024/aesthetic-shadow-v2-backup", device=self.device) print("Loading Waifu Scorer model...") self.waifu_scorer = WaifuScorer(device=self.device, verbose=True) print("Loading Aesthetic Predictor V2.5...") self.aesthetic_predictor_v2_5 = load_aesthetic_predictor_v2_5() print("Loading Anime Aesthetic model...") self.anime_aesthetic = load_anime_aesthetic_model() print("All models loaded successfully!") self.temp_dir = tempfile.mkdtemp() def evaluate_image(self, image): results = {} if not isinstance(image, Image.Image): image = Image.fromarray(image) try: shadow_result = self.aesthetic_shadow(images=[image])[0] hq_score = [p for p in shadow_result if p['label'] == 'hq'][0]['score'] # Scale aesthetic_shadow to 0-10 and clamp aesthetic_shadow_score = np.clip(hq_score * 10.0, 0.0, 10.0) results['aesthetic_shadow'] = aesthetic_shadow_score except Exception as e: print(f"Error in Aesthetic Shadow: {e}") results['aesthetic_shadow'] = None try: waifu_score = self.waifu_scorer([image])[0] # Clamp waifu_score waifu_score_clamped = np.clip(waifu_score, 0.0, 10.0) results['waifu_scorer'] = waifu_score_clamped except Exception as e: print(f"Error in Waifu Scorer: {e}") results['waifu_scorer'] = None try: v2_5_score = self.aesthetic_predictor_v2_5.inference(image) # Clamp v2.5 score v2_5_score_clamped = np.clip(v2_5_score, 0.0, 10.0) results['aesthetic_predictor_v2_5'] = float(np.round(v2_5_score_clamped, 4)) # Keep 4 decimal places after clamping except Exception as e: print(f"Error in Aesthetic Predictor V2.5: {e}") results['aesthetic_predictor_v2_5'] = None try: img_array = np.array(image) anime_score = predict_anime_aesthetic(img_array, self.anime_aesthetic) # Scale Anime Score to 0-10 and clamp anime_score_scaled = np.clip(anime_score * 10.0, 0.0, 10.0) results['anime_aesthetic'] = anime_score_scaled except Exception as e: print(f"Error in Anime Aesthetic: {e}") results['anime_aesthetic'] = None # Calculate Final Score (simple average of available scores) valid_scores = [v for v in results.values() if v is not None] if valid_scores: final_score = np.mean(valid_scores) results['final_score'] = np.clip(final_score, 0.0, 10.0) # Clamp final score too else: results['final_score'] = None return results def image_to_base64(self, image): buffered = BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode('utf-8') def process_single_image(self, file_path): try: img = Image.open(file_path).convert("RGB") eval_results = self.evaluate_image(img) thumbnail = img.copy() thumbnail.thumbnail((200, 200)) img_base64 = self.image_to_base64(thumbnail) result = { 'file_name': os.path.basename(file_path), 'img_data': img_base64, **eval_results } return result except Exception as e: print(f"Error processing {file_path}: {e}") return None def process_images_evaluation(self, image_files): # Renamed and now for evaluation only results = [] for i, file_path in enumerate(image_files): try: img = Image.open(file_path).convert("RGB") eval_results = self.evaluate_image(img) thumbnail = img.copy() thumbnail.thumbnail((200, 200)) img_base64 = self.image_to_base64(thumbnail) result = { 'file_name': os.path.basename(file_path), 'img_data': img_base64, **eval_results } results.append(result) except Exception as e: print(f"Error processing {file_path}: {e}") return results def sort_results(self, results, sort_by="Final Score"): # New function for sorting def sort_key(res): # Define a sorting key function sort_value = res.get(sort_by.lower().replace(" ", "_"), None) # Handle spaces and case if sort_value is None: # Put N/A at the end return -float('inf') if sort_by == "File Name" else float('inf') # File Name sort N/A at end alphabetically return sort_value results.sort(key=sort_key, reverse=sort_by != "File Name") # Sort results, reverse for score columns return results def generate_html_table(self, results): html = """ """ for result in results: html += "" html += f'' html += f'' score = result["aesthetic_shadow"] score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 4 else "bad-score" html += f'' # Format to 4 decimal places score = result["waifu_scorer"] score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score" html += f'' # Format to 4 decimal places score = result["aesthetic_predictor_v2_5"] score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score" html += f'' # Format to 4 decimal places score = result["anime_aesthetic"] score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score" html += f'' # Format to 4 decimal places score = result["final_score"] score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score" html += f'' # Format to 4 decimal places html += "" html += """
Image File Name Aesthetic Shadow Waifu Scorer Aesthetic V2.5 Anime Score Final Score
{result["file_name"]}{score if score is not None else "N/A":.4f}{score if score is not None else "N/A":.4f}{score if score is not None else "N/A":.4f}{score if score is not None else "N/A":.4f}{score if score is not None else "N/A":.4f}
""" return html def cleanup(self): if os.path.exists(self.temp_dir): shutil.rmtree(self.temp_dir) # Global variable to store evaluation results global_results = None def create_interface(): global global_results # Use the global variable evaluator = ImageEvaluationTool() sort_options = ["Final Score", "File Name", "Aesthetic Shadow", "Waifu Scorer", "Aesthetic V2.5", "Anime Score"] # Sort options with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown(""" # Comprehensive Image Evaluation Tool Upload images to evaluate them using multiple aesthetic and quality prediction models: - **Aesthetic Shadow**: Evaluates high-quality vs low-quality images (scaled to 0-10) - **Waifu Scorer**: Rates anime/illustration quality from 0-10 - **Aesthetic Predictor V2.5**: General aesthetic quality prediction (clamped to 0-10) - **Anime Aesthetic**: Specific model for anime style images (scaled and clamped to 0-10) - **Final Score**: Average of available scores (clamped to 0-10) Upload multiple images to get a comprehensive evaluation table. Scores are clamped to the range 0.0000 - 10.0000. """) with gr.Row(): with gr.Column(scale=1): input_images = gr.Files(label="Upload Images") sort_dropdown = gr.Dropdown(sort_options, value="Final Score", label="Sort by") # Dropdown for sorting process_btn = gr.Button("Evaluate Images", variant="primary") clear_btn = gr.Button("Clear Results") with gr.Column(scale=2): progress_html = gr.HTML(label="Progress") # Keep progress_html if you want to show initial progress output_html = gr.HTML(label="Evaluation Results") def process_images_and_update(files): # Renamed and simplified global global_results file_paths = [f.name for f in files] total = len(file_paths) progress_html_content = "" # Initialize progress content if not file_paths: # Handle no files uploaded global_results = [] return progress_html_content, evaluator.generate_html_table([]) # Empty table progress_html_content = "" for i, file_path in enumerate(file_paths): percent = (i / total) * 100 progress_bar = f"""

Processing {os.path.basename(file_path)}

{percent:.1f}% complete

""" progress_html_content = progress_bar # Update progress content yield progress_html_content, gr.update() # Yield progress update # No need to process and sort here, just evaluate global_results = evaluator.process_images_evaluation(file_paths) # Evaluate all images and store sorted_results = evaluator.sort_results(global_results, sort_by="Final Score") # Initial sort by Final Score html_table = evaluator.generate_html_table(sorted_results) yield "

Processing complete

", html_table # Final progress and table def update_table_sort(sort_by_column): # New function for sorting update global global_results if global_results is None: return "No images evaluated yet." # Or handle case when no images are evaluated sorted_results = evaluator.sort_results(global_results, sort_by=sort_by_column) html_table = evaluator.generate_html_table(sorted_results) return html_table def clear_results(): global global_results global_results = None # Clear stored results return gr.update(value=""), gr.update(value="") process_btn.click( process_images_and_update, inputs=[input_images], outputs=[progress_html, output_html] ) sort_dropdown.change( # Only update table on sort change update_table_sort, inputs=[sort_dropdown], outputs=[output_html] # Only update output_html ) clear_btn.click( clear_results, inputs=[], outputs=[progress_html, output_html] ) demo.load(lambda: None, inputs=None, outputs=None) gr.Markdown(""" ### Notes - The evaluation may take some time depending on the number and size of images - For best results, use high-quality images - Scores are color-coded: green for good (>=7), orange for medium (>=5), and red for poor scores (<5, or <4 for Aesthetic Shadow) - Some models may fail for certain image types, shown as "N/A" in the results - "Final Score" is a simple average of available model scores. - Table is sortable by clicking the dropdown above the "Evaluate Images" button. Default sort is by "Final Score". Sorting happens instantly without re-evaluating images. """) return demo if __name__ == "__main__": demo = create_interface() demo.queue().launch()