|
import gradio as gr |
|
import torch |
|
import os |
|
import numpy as np |
|
import cv2 |
|
import onnxruntime as rt |
|
from PIL import Image |
|
from transformers import pipeline |
|
from huggingface_hub import hf_hub_download |
|
import pandas as pd |
|
import tempfile |
|
import shutil |
|
import base64 |
|
from io import BytesIO |
|
|
|
|
|
from aesthetic_predictor_v2_5 import convert_v2_5_from_siglip |
|
|
|
class MLP(torch.nn.Module): |
|
def __init__(self, input_size, xcol='emb', ycol='avg_rating', batch_norm=True): |
|
super().__init__() |
|
self.input_size = input_size |
|
self.xcol = xcol |
|
self.ycol = ycol |
|
self.layers = torch.nn.Sequential( |
|
torch.nn.Linear(self.input_size, 2048), |
|
torch.nn.ReLU(), |
|
torch.nn.BatchNorm1d(2048) if batch_norm else torch.nn.Identity(), |
|
torch.nn.Dropout(0.3), |
|
torch.nn.Linear(2048, 512), |
|
torch.nn.ReLU(), |
|
torch.nn.BatchNorm1d(512) if batch_norm else torch.nn.Identity(), |
|
torch.nn.Dropout(0.3), |
|
torch.nn.Linear(512, 256), |
|
torch.nn.ReLU(), |
|
torch.nn.BatchNorm1d(256) if batch_norm else torch.nn.Identity(), |
|
torch.nn.Dropout(0.2), |
|
torch.nn.Linear(256, 128), |
|
torch.nn.ReLU(), |
|
torch.nn.BatchNorm1d(128) if batch_norm else torch.nn.Identity(), |
|
torch.nn.Dropout(0.1), |
|
torch.nn.Linear(128, 32), |
|
torch.nn.ReLU(), |
|
torch.nn.Linear(32, 1) |
|
) |
|
|
|
def forward(self, x): |
|
return self.layers(x) |
|
|
|
class WaifuScorer(object): |
|
def __init__(self, model_path=None, device='cuda', cache_dir=None, verbose=False): |
|
self.verbose = verbose |
|
|
|
try: |
|
import clip |
|
|
|
if model_path is None: |
|
model_path = "Eugeoter/waifu-scorer-v3/model.pth" |
|
if self.verbose: |
|
print(f"model path not set, switch to default: `{model_path}`") |
|
|
|
if not os.path.isfile(model_path): |
|
split = model_path.split("/") |
|
username, repo_id, model_name = split[-3], split[-2], split[-1] |
|
model_path = hf_hub_download(f"{username}/{repo_id}", model_name, cache_dir=cache_dir) |
|
|
|
print(f"Loading WaifuScorer model from `{model_path}`") |
|
|
|
self.mlp = MLP(input_size=768) |
|
if model_path.endswith(".safetensors"): |
|
from safetensors.torch import load_file |
|
state_dict = load_file(model_path) |
|
else: |
|
state_dict = torch.load(model_path, map_location=device) |
|
self.mlp.load_state_dict(state_dict) |
|
self.mlp.to(device) |
|
|
|
self.model2, self.preprocess = clip.load("ViT-L/14", device=device) |
|
self.device = device |
|
self.dtype = torch.float32 |
|
self.mlp.eval() |
|
self.available = True |
|
except Exception as e: |
|
print(f"Unable to initialize WaifuScorer: {e}") |
|
self.available = False |
|
|
|
@torch.no_grad() |
|
def __call__(self, images): |
|
if not self.available: |
|
return [None] * (1 if not isinstance(images, list) else len(images)) |
|
|
|
if isinstance(images, Image.Image): |
|
images = [images] |
|
n = len(images) |
|
if n == 1: |
|
images = images*2 |
|
|
|
image_tensors = [self.preprocess(img).unsqueeze(0) for img in images] |
|
image_batch = torch.cat(image_tensors).to(self.device) |
|
image_features = self.model2.encode_image(image_batch) |
|
|
|
l2 = image_features.norm(2, dim=-1, keepdim=True) |
|
l2[l2 == 0] = 1 |
|
im_emb_arr = (image_features / l2).to(device=self.device, dtype=self.dtype) |
|
|
|
predictions = self.mlp(im_emb_arr) |
|
scores = predictions.clamp(0, 10).cpu().numpy().reshape(-1).tolist() |
|
|
|
return scores[:n] |
|
|
|
def load_aesthetic_predictor_v2_5(): |
|
class AestheticPredictorV2_5_Impl: |
|
def __init__(self): |
|
print("Loading Aesthetic Predictor V2.5...") |
|
self.model, self.preprocessor = convert_v2_5_from_siglip( |
|
low_cpu_mem_usage=True, |
|
trust_remote_code=True, |
|
) |
|
if torch.cuda.is_available(): |
|
self.model = self.model.to(torch.bfloat16).cuda() |
|
|
|
def inference(self, image: Image.Image) -> float: |
|
|
|
pixel_values = self.preprocessor( |
|
images=image.convert("RGB"), return_tensors="pt" |
|
).pixel_values |
|
|
|
if torch.cuda.is_available(): |
|
pixel_values = pixel_values.to(torch.bfloat16).cuda() |
|
|
|
|
|
with torch.inference_mode(): |
|
score = self.model(pixel_values).logits.squeeze().float().cpu().numpy() |
|
|
|
return score |
|
|
|
return AestheticPredictorV2_5_Impl() |
|
|
|
def load_anime_aesthetic_model(): |
|
model_path = hf_hub_download(repo_id="skytnt/anime-aesthetic", filename="model.onnx") |
|
model = rt.InferenceSession(model_path, providers=['CPUExecutionProvider']) |
|
return model |
|
|
|
def predict_anime_aesthetic(img, model): |
|
img = np.array(img).astype(np.float32) / 255 |
|
s = 768 |
|
h, w = img.shape[:-1] |
|
h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s) |
|
ph, pw = s - h, s - w |
|
img_input = np.zeros([s, s, 3], dtype=np.float32) |
|
img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = cv2.resize(img, (w, h)) |
|
img_input = np.transpose(img_input, (2, 0, 1)) |
|
img_input = img_input[np.newaxis, :] |
|
pred = model.run(None, {"img": img_input})[0].item() |
|
return pred |
|
|
|
class ImageEvaluationTool: |
|
def __init__(self): |
|
self.device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
print(f"Using device: {self.device}") |
|
|
|
print("Loading models... This may take some time.") |
|
|
|
print("Loading Aesthetic Shadow model...") |
|
self.aesthetic_shadow = pipeline("image-classification", model="NeoChen1024/aesthetic-shadow-v2-backup", device=self.device) |
|
|
|
print("Loading Waifu Scorer model...") |
|
self.waifu_scorer = WaifuScorer(device=self.device, verbose=True) |
|
|
|
print("Loading Aesthetic Predictor V2.5...") |
|
self.aesthetic_predictor_v2_5 = load_aesthetic_predictor_v2_5() |
|
|
|
print("Loading Anime Aesthetic model...") |
|
self.anime_aesthetic = load_anime_aesthetic_model() |
|
|
|
print("All models loaded successfully!") |
|
|
|
self.temp_dir = tempfile.mkdtemp() |
|
|
|
def evaluate_image(self, image): |
|
results = {} |
|
|
|
if not isinstance(image, Image.Image): |
|
image = Image.fromarray(image) |
|
|
|
try: |
|
shadow_result = self.aesthetic_shadow(images=[image])[0] |
|
hq_score = [p for p in shadow_result if p['label'] == 'hq'][0]['score'] |
|
|
|
aesthetic_shadow_score = np.clip(hq_score * 10.0, 0.0, 10.0) |
|
results['aesthetic_shadow'] = aesthetic_shadow_score |
|
except Exception as e: |
|
print(f"Error in Aesthetic Shadow: {e}") |
|
results['aesthetic_shadow'] = None |
|
|
|
try: |
|
waifu_score = self.waifu_scorer([image])[0] |
|
|
|
waifu_score_clamped = np.clip(waifu_score, 0.0, 10.0) |
|
results['waifu_scorer'] = waifu_score_clamped |
|
except Exception as e: |
|
print(f"Error in Waifu Scorer: {e}") |
|
results['waifu_scorer'] = None |
|
|
|
try: |
|
v2_5_score = self.aesthetic_predictor_v2_5.inference(image) |
|
|
|
v2_5_score_clamped = np.clip(v2_5_score, 0.0, 10.0) |
|
results['aesthetic_predictor_v2_5'] = float(np.round(v2_5_score_clamped, 4)) |
|
except Exception as e: |
|
print(f"Error in Aesthetic Predictor V2.5: {e}") |
|
results['aesthetic_predictor_v2_5'] = None |
|
|
|
try: |
|
img_array = np.array(image) |
|
anime_score = predict_anime_aesthetic(img_array, self.anime_aesthetic) |
|
|
|
anime_score_scaled = np.clip(anime_score * 10.0, 0.0, 10.0) |
|
results['anime_aesthetic'] = anime_score_scaled |
|
except Exception as e: |
|
print(f"Error in Anime Aesthetic: {e}") |
|
results['anime_aesthetic'] = None |
|
|
|
|
|
valid_scores = [v for v in results.values() if v is not None] |
|
if valid_scores: |
|
final_score = np.mean(valid_scores) |
|
results['final_score'] = np.clip(final_score, 0.0, 10.0) |
|
else: |
|
results['final_score'] = None |
|
|
|
return results |
|
|
|
def image_to_base64(self, image): |
|
buffered = BytesIO() |
|
image.save(buffered, format="JPEG") |
|
return base64.b64encode(buffered.getvalue()).decode('utf-8') |
|
|
|
def process_single_image(self, file_path): |
|
try: |
|
img = Image.open(file_path).convert("RGB") |
|
eval_results = self.evaluate_image(img) |
|
thumbnail = img.copy() |
|
thumbnail.thumbnail((200, 200)) |
|
img_base64 = self.image_to_base64(thumbnail) |
|
result = { |
|
'file_name': os.path.basename(file_path), |
|
'img_data': img_base64, |
|
**eval_results |
|
} |
|
return result |
|
except Exception as e: |
|
print(f"Error processing {file_path}: {e}") |
|
return None |
|
|
|
def process_images_evaluation(self, image_files): |
|
results = [] |
|
|
|
for i, file_path in enumerate(image_files): |
|
try: |
|
img = Image.open(file_path).convert("RGB") |
|
eval_results = self.evaluate_image(img) |
|
|
|
thumbnail = img.copy() |
|
thumbnail.thumbnail((200, 200)) |
|
|
|
img_base64 = self.image_to_base64(thumbnail) |
|
|
|
result = { |
|
'file_name': os.path.basename(file_path), |
|
'img_data': img_base64, |
|
**eval_results |
|
} |
|
results.append(result) |
|
|
|
except Exception as e: |
|
print(f"Error processing {file_path}: {e}") |
|
|
|
return results |
|
|
|
def sort_results(self, results, sort_by="Final Score"): |
|
def sort_key(res): |
|
sort_value = res.get(sort_by.lower().replace(" ", "_"), None) |
|
if sort_value is None: |
|
return -float('inf') if sort_by == "File Name" else float('inf') |
|
return sort_value |
|
|
|
results.sort(key=sort_key, reverse=sort_by != "File Name") |
|
return results |
|
|
|
def generate_html_table(self, results): |
|
html = """ |
|
<style> |
|
.results-table { |
|
width: 100%; |
|
border-collapse: collapse; |
|
margin: 20px 0; |
|
font-family: Arial, sans-serif; |
|
background-color: transparent; |
|
} |
|
|
|
.results-table th, |
|
.results-table td { |
|
color: #eee; |
|
border: 1px solid #ddd; |
|
padding: 8px; |
|
text-align: center; |
|
background-color: transparent; |
|
} |
|
|
|
.results-table th { |
|
font-weight: bold; |
|
} |
|
|
|
.results-table tr:nth-child(even) { |
|
background-color: transparent; |
|
} |
|
|
|
.results-table tr:hover { |
|
background-color: rgba(255, 255, 255, 0.1); |
|
} |
|
|
|
.image-preview { |
|
max-width: 150px; |
|
max-height: 150px; |
|
display: block; |
|
margin: 0 auto; |
|
} |
|
|
|
.good-score { |
|
color: #0f0; |
|
font-weight: bold; |
|
} |
|
.bad-score { |
|
color: #f00; |
|
font-weight: bold; |
|
} |
|
.medium-score { |
|
color: orange; |
|
font-weight: bold; |
|
} |
|
</style> |
|
|
|
<table class="results-table"> |
|
<thead> |
|
<tr> |
|
<th>Image</th> |
|
<th>File Name</th> |
|
<th>Aesthetic Shadow</th> |
|
<th>Waifu Scorer</th> |
|
<th>Aesthetic V2.5</th> |
|
<th>Anime Score</th> |
|
<th>Final Score</th> |
|
</tr> |
|
</thead> |
|
<tbody> |
|
""" |
|
|
|
for result in results: |
|
html += "<tr>" |
|
html += f'<td><img src="data:image/jpeg;base64,{result["img_data"]}" class="image-preview"></td>' |
|
html += f'<td>{result["file_name"]}</td>' |
|
|
|
score = result["aesthetic_shadow"] |
|
score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 4 else "bad-score" |
|
html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' |
|
|
|
score = result["waifu_scorer"] |
|
score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score" |
|
html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' |
|
|
|
score = result["aesthetic_predictor_v2_5"] |
|
score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score" |
|
html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' |
|
|
|
score = result["anime_aesthetic"] |
|
score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score" |
|
html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' |
|
|
|
score = result["final_score"] |
|
score_class = "good-score" if score and score >= 7 else "medium-score" if score and score >= 5 else "bad-score" |
|
html += f'<td class="{score_class}">{score if score is not None else "N/A":.4f}</td>' |
|
|
|
|
|
html += "</tr>" |
|
|
|
html += """ |
|
</tbody> |
|
</table> |
|
""" |
|
|
|
return html |
|
|
|
def cleanup(self): |
|
if os.path.exists(self.temp_dir): |
|
shutil.rmtree(self.temp_dir) |
|
|
|
|
|
global_results = None |
|
|
|
def create_interface(): |
|
global global_results |
|
|
|
evaluator = ImageEvaluationTool() |
|
sort_options = ["Final Score", "File Name", "Aesthetic Shadow", "Waifu Scorer", "Aesthetic V2.5", "Anime Score"] |
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown(""" |
|
# Comprehensive Image Evaluation Tool |
|
|
|
Upload images to evaluate them using multiple aesthetic and quality prediction models: |
|
|
|
- **Aesthetic Shadow**: Evaluates high-quality vs low-quality images (scaled to 0-10) |
|
- **Waifu Scorer**: Rates anime/illustration quality from 0-10 |
|
- **Aesthetic Predictor V2.5**: General aesthetic quality prediction (clamped to 0-10) |
|
- **Anime Aesthetic**: Specific model for anime style images (scaled and clamped to 0-10) |
|
- **Final Score**: Average of available scores (clamped to 0-10) |
|
|
|
Upload multiple images to get a comprehensive evaluation table. Scores are clamped to the range 0.0000 - 10.0000. |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
input_images = gr.Files(label="Upload Images") |
|
sort_dropdown = gr.Dropdown(sort_options, value="Final Score", label="Sort by") |
|
process_btn = gr.Button("Evaluate Images", variant="primary") |
|
clear_btn = gr.Button("Clear Results") |
|
|
|
with gr.Column(scale=2): |
|
progress_html = gr.HTML(label="Progress") |
|
output_html = gr.HTML(label="Evaluation Results") |
|
|
|
def process_images_and_update(files): |
|
global global_results |
|
file_paths = [f.name for f in files] |
|
total = len(file_paths) |
|
progress_html_content = "" |
|
|
|
if not file_paths: |
|
global_results = [] |
|
return progress_html_content, evaluator.generate_html_table([]) |
|
|
|
progress_html_content = "" |
|
for i, file_path in enumerate(file_paths): |
|
percent = (i / total) * 100 |
|
progress_bar = f""" |
|
<div> |
|
<p>Processing {os.path.basename(file_path)}</p> |
|
<progress value="{percent}" max="100"></progress> |
|
<p>{percent:.1f}% complete</p> |
|
</div> |
|
""" |
|
progress_html_content = progress_bar |
|
yield progress_html_content, gr.update() |
|
|
|
global_results = evaluator.process_images_evaluation(file_paths) |
|
sorted_results = evaluator.sort_results(global_results, sort_by="Final Score") |
|
html_table = evaluator.generate_html_table(sorted_results) |
|
yield "<p>Processing complete</p>", html_table |
|
|
|
def update_table_sort(sort_by_column): |
|
global global_results |
|
if global_results is None: |
|
return "No images evaluated yet." |
|
sorted_results = evaluator.sort_results(global_results, sort_by=sort_by_column) |
|
html_table = evaluator.generate_html_table(sorted_results) |
|
return html_table |
|
|
|
def clear_results(): |
|
global global_results |
|
global_results = None |
|
return gr.update(value=""), gr.update(value="") |
|
|
|
|
|
process_btn.click( |
|
process_images_and_update, |
|
inputs=[input_images], |
|
outputs=[progress_html, output_html] |
|
) |
|
sort_dropdown.change( |
|
update_table_sort, |
|
inputs=[sort_dropdown], |
|
outputs=[output_html] |
|
) |
|
clear_btn.click( |
|
clear_results, |
|
inputs=[], |
|
outputs=[progress_html, output_html] |
|
) |
|
|
|
demo.load(lambda: None, inputs=None, outputs=None) |
|
|
|
gr.Markdown(""" |
|
### Notes |
|
- The evaluation may take some time depending on the number and size of images |
|
- For best results, use high-quality images |
|
- Scores are color-coded: green for good (>=7), orange for medium (>=5), and red for poor scores (<5, or <4 for Aesthetic Shadow) |
|
- Some models may fail for certain image types, shown as "N/A" in the results |
|
- "Final Score" is a simple average of available model scores. |
|
- Table is sortable by clicking the dropdown above the "Evaluate Images" button. Default sort is by "Final Score". Sorting happens instantly without re-evaluating images. |
|
""") |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = create_interface() |
|
demo.queue().launch() |