Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PIL import Image, PngImagePlugin | |
import io | |
import os | |
import pandas as pd | |
import torch | |
from transformers import pipeline as transformers_pipeline , AutoModelForImageClassification, CLIPImageProcessor # Изменено для ImageReward | |
# from torchvision import transforms | |
from torchmetrics.functional.multimodal import clip_score | |
import open_clip # Изменено для open_clip | |
import re | |
import matplotlib.pyplot as plt | |
import json | |
from collections import defaultdict | |
import numpy as np | |
import logging | |
# --- ONNX Related Imports and Setup --- | |
try: | |
import onnxruntime | |
except ImportError: | |
print("onnxruntime not found. Please ensure it's in requirements.txt") | |
onnxruntime = None | |
from huggingface_hub import hf_hub_download | |
try: | |
from imgutils.data import rgb_encode | |
IMGUTILS_AVAILABLE = True | |
print("imgutils.data.rgb_encode found and will be used.") | |
except ImportError: | |
print("imgutils.data.rgb_encode not found. Using a basic fallback for preprocessing deepghs models.") | |
IMGUTILS_AVAILABLE = False | |
def rgb_encode(image: Image.Image, order_='CHW'): # Простая заглушка | |
img_arr = np.array(image.convert("RGB")) # Убедимся что RGB | |
if order_ == 'CHW': | |
img_arr = np.transpose(img_arr, (2, 0, 1)) | |
# Эта заглушка возвращает uint8 0-255, как и ожидается далее | |
return img_arr.astype(np.uint8) | |
# --- Модель Конфигурация и Загрузка --- | |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"Using device: {DEVICE}") | |
ONNX_DEVICE = "CUDAExecutionProvider" if DEVICE == "cuda" and onnxruntime and "CUDAExecutionProvider" in onnxruntime.get_available_providers() else "CPUExecutionProvider" | |
print(f"Using ONNX device: {ONNX_DEVICE}") | |
# --- Helper for ONNX models (deepghs) --- | |
def _img_preprocess_for_onnx(image: Image.Image, size: tuple = (384, 384), normalize_mean=0.5, normalize_std=0.5): | |
image = image.resize(size, Image.Resampling.BILINEAR) | |
data_uint8 = rgb_encode(image, order_='CHW') # (C, H, W), uint8, 0-255 | |
data_float01 = data_uint8.astype(np.float32) / 255.0 | |
mean = np.array([normalize_mean] * 3, dtype=np.float32).reshape((3, 1, 1)) | |
std = np.array([normalize_std] * 3, dtype=np.float32).reshape((3, 1, 1)) | |
normalized_data = (data_float01 - mean) / std | |
return normalized_data[None, ...].astype(np.float32) | |
onnx_sessions_cache = {} | |
def get_onnx_session_and_meta(repo_id, model_subfolder): | |
cache_key = f"{repo_id}/{model_subfolder}" | |
if cache_key in onnx_sessions_cache: | |
return onnx_sessions_cache[cache_key] | |
if not onnxruntime: | |
# raise ImportError("ONNX Runtime is not available.") # Не будем падать, просто вернем None | |
print("ONNX Runtime is not available for get_onnx_session_and_meta") | |
onnx_sessions_cache[cache_key] = (None, [], None) | |
return None, [], None | |
try: | |
model_path = hf_hub_download(repo_id, filename=f"{model_subfolder}/model.onnx") | |
meta_path = hf_hub_download(repo_id, filename=f"{model_subfolder}/meta.json") | |
options = onnxruntime.SessionOptions() | |
options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL | |
if ONNX_DEVICE == "CPUExecutionProvider" and hasattr(os, 'cpu_count'): # hasattr для безопасности | |
options.intra_op_num_threads = os.cpu_count() | |
session = onnxruntime.InferenceSession(model_path, options, providers=[ONNX_DEVICE]) | |
with open(meta_path, 'r') as f: | |
meta = json.load(f) | |
labels = meta.get('labels', []) | |
onnx_sessions_cache[cache_key] = (session, labels, meta) | |
return session, labels, meta | |
except Exception as e: | |
print(f"Error loading ONNX model {repo_id}/{model_subfolder}: {e}") | |
onnx_sessions_cache[cache_key] = (None, [], None) | |
return None, [], None | |
# 1. ImageReward | |
try: | |
# THUDM/ImageReward использует CLIPImageProcessor | |
reward_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14") # Типичный процессор для таких моделей | |
reward_model = AutoModelForImageClassification.from_pretrained("THUDM/ImageReward").to(DEVICE) | |
reward_model.eval() | |
print("THUDM/ImageReward loaded successfully.") | |
except Exception as e: | |
print(f"Error loading THUDM/ImageReward: {e}") | |
reward_processor, reward_model = None, None | |
# 2. Anime Aesthetic (deepghs ONNX) | |
ANIME_AESTHETIC_REPO = "deepghs/anime_aesthetic" | |
ANIME_AESTHETIC_SUBFOLDER = "swinv2pv3_v0_448_ls0.2_x" | |
ANIME_AESTHETIC_IMG_SIZE = (448, 448) | |
ANIME_AESTHETIC_LABEL_WEIGHTS = {"normal": 0.0, "slight": 1.0, "moderate": 2.0, "strong": 3.0, "extreme": 4.0} | |
# 3. MANIQA (Technical Quality) - ВРЕМЕННО ОТКЛЮЧЕНО | |
maniqa_pipe = None | |
print("MANIQA (honklers/maniqa-nr) is temporarily disabled due to loading issues. Will look for alternatives.") | |
# try: | |
# maniqa_pipe = transformers_pipeline("image-classification", model="honklers/maniqa-nr", device=torch.device(DEVICE).index if DEVICE=="cuda" else -1) | |
# except Exception as e: | |
# print(f"Error loading honklers/maniqa-nr: {e}") | |
# maniqa_pipe = None | |
# 4. CLIP Score (laion/CLIP-ViT-L-14-laion2B-s32B-b82K) - open_clip | |
try: | |
clip_model_name = 'ViT-L-14' | |
# Для open_clip, `pretrained` это обычно имя датасета или комбинация | |
# `laion2b_s32b_b82k` - это один из весов для ViT-L-14 | |
clip_model_instance, clip_preprocess_train, clip_preprocess_val = open_clip.create_model_and_transforms( | |
clip_model_name, | |
pretrained='laion2b_s32b_b82k', # Это правильное имя претрейна для open_clip | |
device=DEVICE | |
) | |
clip_preprocess = clip_preprocess_val # Используем preprocess для инференса | |
clip_tokenizer = open_clip.get_tokenizer(clip_model_name) | |
clip_model_instance.eval() | |
print(f"CLIP model {clip_model_name} (laion2b_s32b_b82k) loaded successfully.") | |
except Exception as e: | |
print(f"Error loading CLIP model {clip_model_name} (laion2b_s32b_b82k): {e}") | |
clip_model_instance, clip_preprocess, clip_tokenizer = None, None, None | |
# 5. AI Detectors | |
# Organika/sdxl-detector - Transformers pipeline | |
try: | |
sdxl_detector_pipe = transformers_pipeline("image-classification", model="Organika/sdxl-detector", device=torch.device(DEVICE).index if DEVICE=="cuda" else -1) | |
print("Organika/sdxl-detector loaded successfully.") | |
except Exception as e: | |
print(f"Error loading Organika/sdxl-detector: {e}") | |
sdxl_detector_pipe = None | |
# deepghs/anime_ai_check - ONNX | |
ANIME_AI_CHECK_REPO = "deepghs/anime_ai_check" | |
ANIME_AI_CHECK_SUBFOLDER = "caformer_s36_plus_sce" | |
ANIME_AI_CHECK_IMG_SIZE = (384, 384) | |
# --- Функции извлечения метаданных (без изменений) --- | |
def extract_sd_parameters(image_pil): | |
if image_pil is None: return "", "N/A", "N/A", "N/A", {} | |
parameters_str = image_pil.info.get("parameters", "") | |
if not parameters_str: return "", "N/A", "N/A", "N/A", {} | |
prompt, negative_prompt, model_name, model_hash, other_params_dict = "", "N/A", "N/A", "N/A", {} | |
try: | |
neg_prompt_index = parameters_str.find("Negative prompt:") | |
steps_meta_index = parameters_str.find("Steps:") | |
if neg_prompt_index != -1: | |
prompt = parameters_str[:neg_prompt_index].strip() | |
params_part_start_index = steps_meta_index if steps_meta_index > neg_prompt_index else -1 | |
if params_part_start_index != -1: | |
negative_prompt = parameters_str[neg_prompt_index + len("Negative prompt:"):params_part_start_index].strip() | |
params_part = parameters_str[params_part_start_index:] | |
else: | |
end_of_neg = parameters_str.find("\n", neg_prompt_index + len("Negative prompt:")) | |
if end_of_neg == -1: end_of_neg = len(parameters_str) | |
negative_prompt = parameters_str[neg_prompt_index + len("Negative prompt:"):end_of_neg].strip() | |
params_part = parameters_str[end_of_neg:].strip() if end_of_neg < len(parameters_str) else "" | |
elif steps_meta_index != -1: | |
prompt = parameters_str[:steps_meta_index].strip() | |
params_part = parameters_str[steps_meta_index:] | |
else: | |
prompt = parameters_str.strip() | |
params_part = "" | |
if params_part: | |
params_list = [p.strip() for p in params_part.split(",")] | |
temp_other_params = {} | |
for param_val_str in params_list: | |
parts = param_val_str.split(':', 1) | |
if len(parts) == 2: | |
key, value = parts[0].strip(), parts[1].strip() | |
temp_other_params[key] = value | |
if key == "Model": model_name = value | |
elif key == "Model hash": model_hash = value | |
for k,v in temp_other_params.items(): | |
if k not in ["Model", "Model hash"]: other_params_dict[k] = v | |
if model_name == "N/A" and model_hash != "N/A": model_name = f"hash_{model_hash}" | |
# Fallback for model name if only Checkpoint is present (e.g. from ComfyUI) | |
if model_name == "N/A" and "Checkpoint" in other_params_dict: model_name = other_params_dict["Checkpoint"] | |
if model_name == "N/A" and "model" in other_params_dict: model_name = other_params_dict["model"] # Another common key | |
except Exception as e: | |
print(f"Error parsing metadata: {e}") | |
return prompt, negative_prompt, model_name, model_hash, other_params_dict | |
# --- Функции оценки --- | |
def get_image_reward(image_pil): | |
if not reward_model or not reward_processor: return "N/A" | |
try: | |
# ImageReward ожидает специфическую предобработку, часто как у CLIP | |
inputs = reward_processor(images=image_pil, return_tensors="pt", padding=True, truncation=True).to(DEVICE) | |
outputs = reward_model(**inputs) | |
return round(outputs.logits.item(), 4) | |
except Exception as e: | |
print(f"Error in ImageReward: {e}") | |
return "Error" | |
def get_anime_aesthetic_score_deepghs(image_pil): | |
session, labels, meta = get_onnx_session_and_meta(ANIME_AESTHETIC_REPO, ANIME_AESTHETIC_SUBFOLDER) | |
if not session or not labels: return "N/A" | |
try: | |
input_data = _img_preprocess_for_onnx(image_pil.copy(), size=ANIME_AESTHETIC_IMG_SIZE) | |
input_name = session.get_inputs()[0].name | |
output_name = session.get_outputs()[0].name | |
onnx_output, = session.run([output_name], {input_name: input_data}) | |
scores = onnx_output[0] | |
exp_scores = np.exp(scores - np.max(scores)) | |
probabilities = exp_scores / np.sum(exp_scores) | |
weighted_score = sum(probabilities[i] * ANIME_AESTHETIC_LABEL_WEIGHTS.get(label, 0.0) for i, label in enumerate(labels)) | |
return round(weighted_score, 4) | |
except Exception as e: | |
print(f"Error in Anime Aesthetic (ONNX): {e}") | |
return "Error" | |
def get_maniqa_score(image_pil): # Временно возвращает N/A | |
# if not maniqa_pipe: return "N/A" | |
# try: | |
# result = maniqa_pipe(image_pil.copy()) | |
# score = 0.0 | |
# for item in result: | |
# if item['label'].lower() == 'good quality': score = item['score']; break | |
# return round(score, 4) | |
# except Exception as e: | |
# print(f"Error in MANIQA: {e}") | |
# return "Error" | |
return "N/A (Disabled)" | |
def calculate_clip_score_value(image_pil, prompt_text): | |
if not clip_model_instance or not clip_preprocess or not clip_tokenizer or not prompt_text or prompt_text == "N/A": | |
return "N/A" | |
try: | |
image_input = clip_preprocess(image_pil).unsqueeze(0).to(DEVICE) | |
# Убедимся, что prompt_text это строка, а не None или что-то еще | |
text_for_tokenizer = str(prompt_text) if prompt_text else "" | |
if not text_for_tokenizer: return "N/A (Empty Prompt)" | |
text_input = clip_tokenizer([text_for_tokenizer]).to(DEVICE) | |
image_features = clip_model_instance.encode_image(image_input) | |
text_features = clip_model_instance.encode_text(text_input) | |
image_features_norm = image_features / image_features.norm(p=2, dim=-1, keepdim=True) | |
text_features_norm = text_features / text_features.norm(p=2, dim=-1, keepdim=True) | |
score = (text_features_norm @ image_features_norm.T).squeeze().item() * 100.0 | |
return round(score, 2) | |
except Exception as e: | |
print(f"Error in CLIP Score: {e}") | |
return "Error" | |
def get_sdxl_detection_score(image_pil): | |
if not sdxl_detector_pipe: return "N/A" | |
try: | |
result = sdxl_detector_pipe(image_pil.copy()) | |
ai_score = 0.0 | |
for item in result: | |
if item['label'].lower() == 'artificial': ai_score = item['score']; break | |
return round(ai_score, 4) | |
except Exception as e: | |
print(f"Error in SDXL Detector: {e}") | |
return "Error" | |
def get_anime_ai_check_score_deepghs(image_pil): | |
session, labels, meta = get_onnx_session_and_meta(ANIME_AI_CHECK_REPO, ANIME_AI_CHECK_SUBFOLDER) | |
if not session or not labels: return "N/A" | |
try: | |
input_data = _img_preprocess_for_onnx(image_pil.copy(), size=ANIME_AI_CHECK_IMG_SIZE) | |
input_name = session.get_inputs()[0].name | |
output_name = session.get_outputs()[0].name | |
onnx_output, = session.run([output_name], {input_name: input_data}) | |
scores = onnx_output[0] | |
exp_scores = np.exp(scores - np.max(scores)) | |
probabilities = exp_scores / np.sum(exp_scores) | |
ai_prob = 0.0 | |
for i, label in enumerate(labels): | |
if label.lower() == 'ai': ai_prob = probabilities[i]; break | |
return round(ai_prob, 4) | |
except Exception as e: | |
print(f"Error in Anime AI Check (ONNX): {e}") | |
return "Error" | |
# --- Основная функция обработки --- | |
def process_images(files, progress=gr.Progress(track_tqdm=True)): | |
if not files: | |
return pd.DataFrame(), None, None, None, None, "Please upload some images." | |
all_results = [] | |
for i, file_obj in enumerate(files): | |
filename = "Unknown File" | |
try: | |
# file_obj.name может быть абсолютным путем на сервере | |
filename = os.path.basename(getattr(file_obj, 'name', f"file_{i}")) | |
img = Image.open(getattr(file_obj, 'name', str(file_obj))) | |
if img.mode != "RGB": img = img.convert("RGB") | |
prompt, neg_prompt, model_n, model_h, other_p = extract_sd_parameters(img) | |
reward = get_image_reward(img) | |
anime_aes_deepghs = get_anime_aesthetic_score_deepghs(img) | |
maniqa = get_maniqa_score(img) # Будет N/A (Disabled) | |
clip_val = calculate_clip_score_value(img, prompt) | |
sdxl_detect = get_sdxl_detection_score(img) | |
anime_ai_chk_deepghs = get_anime_ai_check_score_deepghs(img) | |
all_results.append({ | |
"Filename": filename, "Prompt": prompt if prompt else "N/A", "Model Name": model_n, "Model Hash": model_h, | |
"ImageReward": reward, "AnimeAesthetic_dg": anime_aes_deepghs, "MANIQA_TQ": maniqa, | |
"CLIPScore": clip_val, "SDXL_Detector_AI_Prob": sdxl_detect, "AnimeAI_Check_dg_Prob": anime_ai_chk_deepghs, | |
}) | |
except Exception as e: | |
print(f"CRITICAL: Failed to process {filename}: {e}") | |
all_results.append({ | |
"Filename": filename, "Prompt": "Error", "Model Name": "Error", "Model Hash": "Error", | |
"ImageReward": "Error", "AnimeAesthetic_dg": "Error", "MANIQA_TQ": "Error", | |
"CLIPScore": "Error", "SDXL_Detector_AI_Prob": "Error", "AnimeAI_Check_dg_Prob": "Error" | |
}) | |
df = pd.DataFrame(all_results) | |
plot_model_avg_scores_buffer, plot_prompt_clip_scores_buffer = None, None | |
csv_buffer_val, json_buffer_val = "", "" | |
if not df.empty: | |
numeric_cols = ["ImageReward", "AnimeAesthetic_dg", "MANIQA_TQ", "CLIPScore"] | |
for col in numeric_cols: df[col] = pd.to_numeric(df[col], errors='coerce') | |
# График 1 | |
df_model_plot = df[(df["Model Name"] != "N/A") & (df["Model Name"].notna())] | |
if not df_model_plot.empty and df_model_plot["Model Name"].nunique() > 0: | |
try: | |
model_avg_scores = df_model_plot.groupby("Model Name")[numeric_cols].mean().dropna(how='all') | |
if not model_avg_scores.empty: | |
fig1, ax1 = plt.subplots(figsize=(12, 7)); model_avg_scores.plot(kind="bar", ax=ax1) | |
ax1.set_title("Average Scores per Model"); ax1.set_ylabel("Average Score") | |
ax1.tick_params(axis='x', rotation=45, labelsize=8); plt.tight_layout() | |
plot_model_avg_scores_buffer = io.BytesIO(); fig1.savefig(plot_model_avg_scores_buffer, format="png"); plot_model_avg_scores_buffer.seek(0); plt.close(fig1) | |
except Exception as e: print(f"Error generating model average scores plot: {e}") | |
# График 2 | |
df_prompt_plot = df[(df["Prompt"] != "N/A") & (df["Prompt"].notna()) & (df["CLIPScore"].notna())] | |
if not df_prompt_plot.empty and df_prompt_plot["Prompt"].nunique() > 0 : | |
try: | |
df_prompt_plot["Short Prompt"] = df_prompt_plot["Prompt"].apply(lambda x: (str(x)[:30] + '...') if len(str(x)) > 33 else str(x)) | |
prompt_clip_scores = df_prompt_plot.groupby("Short Prompt")["CLIPScore"].mean().sort_values(ascending=False) | |
if not prompt_clip_scores.empty and len(prompt_clip_scores) >= 1 : # Изменено на >=1 для одиночных промптов | |
fig2, ax2 = plt.subplots(figsize=(12, max(7, min(len(prompt_clip_scores)*0.5, 15)))) | |
prompt_clip_scores.head(20).plot(kind="barh", ax=ax2) | |
ax2.set_title("Average CLIPScore per Prompt (Top 20 unique prompts)"); ax2.set_xlabel("Average CLIPScore") | |
plt.tight_layout(); plot_prompt_clip_scores_buffer = io.BytesIO(); fig2.savefig(plot_prompt_clip_scores_buffer, format="png"); plot_prompt_clip_scores_buffer.seek(0); plt.close(fig2) | |
except Exception as e: print(f"Error generating prompt CLIP scores plot: {e}") | |
csv_b = io.StringIO(); df.to_csv(csv_b, index=False); csv_buffer_val = csv_b.getvalue() | |
json_b = io.StringIO(); df.to_json(json_b, orient='records', indent=4); json_buffer_val = json_b.getvalue() | |
return ( | |
df, | |
gr.Image(value=plot_model_avg_scores_buffer, type="pil", visible=plot_model_avg_scores_buffer is not None), | |
gr.Image(value=plot_prompt_clip_scores_buffer, type="pil", visible=plot_prompt_clip_scores_buffer is not None), | |
gr.File(value=csv_buffer_val or None, label="Download CSV Results", visible=bool(csv_buffer_val), file_name="evaluation_results.csv"), | |
gr.File(value=json_buffer_val or None, label="Download JSON Results", visible=bool(json_buffer_val), file_name="evaluation_results.json"), | |
f"Processed {len(all_results)} images.", | |
) | |
# --- Интерфейс Gradio --- | |
with gr.Blocks(css="footer {display: none !important}") as demo: | |
gr.Markdown("# AI Image Model Evaluation Tool") | |
gr.Markdown("Upload PNG images (ideally with Stable Diffusion metadata) to evaluate them...") | |
with gr.Row(): image_uploader = gr.Files(label="Upload Images (PNG)", file_count="multiple", file_types=["image"]) | |
process_button = gr.Button("Evaluate Images", variant="primary") | |
status_textbox = gr.Textbox(label="Status", interactive=False) | |
gr.Markdown("## Evaluation Results Table") | |
results_table = gr.DataFrame(headers=[ # Убран max_rows | |
"Filename", "Prompt", "Model Name", "Model Hash", "ImageReward", "AnimeAesthetic_dg", | |
"MANIQA_TQ", "CLIPScore", "SDXL_Detector_AI_Prob", "AnimeAI_Check_dg_Prob" | |
], wrap=True) | |
with gr.Row(): | |
download_csv_button = gr.File(label="Download CSV Results", interactive=False) | |
download_json_button = gr.File(label="Download JSON Results", interactive=False) | |
gr.Markdown("## Visualizations") | |
with gr.Row(): | |
plot_output_model_avg = gr.Image(label="Average Scores per Model", type="pil", interactive=False) | |
plot_output_prompt_clip = gr.Image(label="Average CLIPScore per Prompt", type="pil", interactive=False) | |
process_button.click(fn=process_images, inputs=[image_uploader], outputs=[ | |
results_table, plot_output_model_avg, plot_output_prompt_clip, | |
download_csv_button, download_json_button, status_textbox | |
]) | |
gr.Markdown("""**Metric Explanations:** ... (без изменений)""") | |
if __name__ == "__main__": | |
demo.launch(debug=True) |