import gradio as gr from PIL import Image, PngImagePlugin import io import os import pandas as pd import torch from transformers import pipeline as transformers_pipeline , AutoModelForImageClassification, CLIPImageProcessor # Изменено для ImageReward # from torchvision import transforms from torchmetrics.functional.multimodal import clip_score import open_clip # Изменено для open_clip import re import matplotlib.pyplot as plt import json from collections import defaultdict import numpy as np import logging # --- ONNX Related Imports and Setup --- try: import onnxruntime except ImportError: print("onnxruntime not found. Please ensure it's in requirements.txt") onnxruntime = None from huggingface_hub import hf_hub_download try: from imgutils.data import rgb_encode IMGUTILS_AVAILABLE = True print("imgutils.data.rgb_encode found and will be used.") except ImportError: print("imgutils.data.rgb_encode not found. Using a basic fallback for preprocessing deepghs models.") IMGUTILS_AVAILABLE = False def rgb_encode(image: Image.Image, order_='CHW'): # Простая заглушка img_arr = np.array(image.convert("RGB")) # Убедимся что RGB if order_ == 'CHW': img_arr = np.transpose(img_arr, (2, 0, 1)) # Эта заглушка возвращает uint8 0-255, как и ожидается далее return img_arr.astype(np.uint8) # --- Модель Конфигурация и Загрузка --- DEVICE = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {DEVICE}") ONNX_DEVICE = "CUDAExecutionProvider" if DEVICE == "cuda" and onnxruntime and "CUDAExecutionProvider" in onnxruntime.get_available_providers() else "CPUExecutionProvider" print(f"Using ONNX device: {ONNX_DEVICE}") # --- Helper for ONNX models (deepghs) --- @torch.no_grad() def _img_preprocess_for_onnx(image: Image.Image, size: tuple = (384, 384), normalize_mean=0.5, normalize_std=0.5): image = image.resize(size, Image.Resampling.BILINEAR) data_uint8 = rgb_encode(image, order_='CHW') # (C, H, W), uint8, 0-255 data_float01 = data_uint8.astype(np.float32) / 255.0 mean = np.array([normalize_mean] * 3, dtype=np.float32).reshape((3, 1, 1)) std = np.array([normalize_std] * 3, dtype=np.float32).reshape((3, 1, 1)) normalized_data = (data_float01 - mean) / std return normalized_data[None, ...].astype(np.float32) onnx_sessions_cache = {} def get_onnx_session_and_meta(repo_id, model_subfolder): cache_key = f"{repo_id}/{model_subfolder}" if cache_key in onnx_sessions_cache: return onnx_sessions_cache[cache_key] if not onnxruntime: # raise ImportError("ONNX Runtime is not available.") # Не будем падать, просто вернем None print("ONNX Runtime is not available for get_onnx_session_and_meta") onnx_sessions_cache[cache_key] = (None, [], None) return None, [], None try: model_path = hf_hub_download(repo_id, filename=f"{model_subfolder}/model.onnx") meta_path = hf_hub_download(repo_id, filename=f"{model_subfolder}/meta.json") options = onnxruntime.SessionOptions() options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL if ONNX_DEVICE == "CPUExecutionProvider" and hasattr(os, 'cpu_count'): # hasattr для безопасности options.intra_op_num_threads = os.cpu_count() session = onnxruntime.InferenceSession(model_path, options, providers=[ONNX_DEVICE]) with open(meta_path, 'r') as f: meta = json.load(f) labels = meta.get('labels', []) onnx_sessions_cache[cache_key] = (session, labels, meta) return session, labels, meta except Exception as e: print(f"Error loading ONNX model {repo_id}/{model_subfolder}: {e}") onnx_sessions_cache[cache_key] = (None, [], None) return None, [], None # 1. ImageReward try: # THUDM/ImageReward использует CLIPImageProcessor reward_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14") # Типичный процессор для таких моделей reward_model = AutoModelForImageClassification.from_pretrained("THUDM/ImageReward").to(DEVICE) reward_model.eval() print("THUDM/ImageReward loaded successfully.") except Exception as e: print(f"Error loading THUDM/ImageReward: {e}") reward_processor, reward_model = None, None # 2. Anime Aesthetic (deepghs ONNX) ANIME_AESTHETIC_REPO = "deepghs/anime_aesthetic" ANIME_AESTHETIC_SUBFOLDER = "swinv2pv3_v0_448_ls0.2_x" ANIME_AESTHETIC_IMG_SIZE = (448, 448) ANIME_AESTHETIC_LABEL_WEIGHTS = {"normal": 0.0, "slight": 1.0, "moderate": 2.0, "strong": 3.0, "extreme": 4.0} # 3. MANIQA (Technical Quality) - ВРЕМЕННО ОТКЛЮЧЕНО maniqa_pipe = None print("MANIQA (honklers/maniqa-nr) is temporarily disabled due to loading issues. Will look for alternatives.") # try: # maniqa_pipe = transformers_pipeline("image-classification", model="honklers/maniqa-nr", device=torch.device(DEVICE).index if DEVICE=="cuda" else -1) # except Exception as e: # print(f"Error loading honklers/maniqa-nr: {e}") # maniqa_pipe = None # 4. CLIP Score (laion/CLIP-ViT-L-14-laion2B-s32B-b82K) - open_clip try: clip_model_name = 'ViT-L-14' # Для open_clip, `pretrained` это обычно имя датасета или комбинация # `laion2b_s32b_b82k` - это один из весов для ViT-L-14 clip_model_instance, clip_preprocess_train, clip_preprocess_val = open_clip.create_model_and_transforms( clip_model_name, pretrained='laion2b_s32b_b82k', # Это правильное имя претрейна для open_clip device=DEVICE ) clip_preprocess = clip_preprocess_val # Используем preprocess для инференса clip_tokenizer = open_clip.get_tokenizer(clip_model_name) clip_model_instance.eval() print(f"CLIP model {clip_model_name} (laion2b_s32b_b82k) loaded successfully.") except Exception as e: print(f"Error loading CLIP model {clip_model_name} (laion2b_s32b_b82k): {e}") clip_model_instance, clip_preprocess, clip_tokenizer = None, None, None # 5. AI Detectors # Organika/sdxl-detector - Transformers pipeline try: sdxl_detector_pipe = transformers_pipeline("image-classification", model="Organika/sdxl-detector", device=torch.device(DEVICE).index if DEVICE=="cuda" else -1) print("Organika/sdxl-detector loaded successfully.") except Exception as e: print(f"Error loading Organika/sdxl-detector: {e}") sdxl_detector_pipe = None # deepghs/anime_ai_check - ONNX ANIME_AI_CHECK_REPO = "deepghs/anime_ai_check" ANIME_AI_CHECK_SUBFOLDER = "caformer_s36_plus_sce" ANIME_AI_CHECK_IMG_SIZE = (384, 384) # --- Функции извлечения метаданных (без изменений) --- def extract_sd_parameters(image_pil): if image_pil is None: return "", "N/A", "N/A", "N/A", {} parameters_str = image_pil.info.get("parameters", "") if not parameters_str: return "", "N/A", "N/A", "N/A", {} prompt, negative_prompt, model_name, model_hash, other_params_dict = "", "N/A", "N/A", "N/A", {} try: neg_prompt_index = parameters_str.find("Negative prompt:") steps_meta_index = parameters_str.find("Steps:") if neg_prompt_index != -1: prompt = parameters_str[:neg_prompt_index].strip() params_part_start_index = steps_meta_index if steps_meta_index > neg_prompt_index else -1 if params_part_start_index != -1: negative_prompt = parameters_str[neg_prompt_index + len("Negative prompt:"):params_part_start_index].strip() params_part = parameters_str[params_part_start_index:] else: end_of_neg = parameters_str.find("\n", neg_prompt_index + len("Negative prompt:")) if end_of_neg == -1: end_of_neg = len(parameters_str) negative_prompt = parameters_str[neg_prompt_index + len("Negative prompt:"):end_of_neg].strip() params_part = parameters_str[end_of_neg:].strip() if end_of_neg < len(parameters_str) else "" elif steps_meta_index != -1: prompt = parameters_str[:steps_meta_index].strip() params_part = parameters_str[steps_meta_index:] else: prompt = parameters_str.strip() params_part = "" if params_part: params_list = [p.strip() for p in params_part.split(",")] temp_other_params = {} for param_val_str in params_list: parts = param_val_str.split(':', 1) if len(parts) == 2: key, value = parts[0].strip(), parts[1].strip() temp_other_params[key] = value if key == "Model": model_name = value elif key == "Model hash": model_hash = value for k,v in temp_other_params.items(): if k not in ["Model", "Model hash"]: other_params_dict[k] = v if model_name == "N/A" and model_hash != "N/A": model_name = f"hash_{model_hash}" # Fallback for model name if only Checkpoint is present (e.g. from ComfyUI) if model_name == "N/A" and "Checkpoint" in other_params_dict: model_name = other_params_dict["Checkpoint"] if model_name == "N/A" and "model" in other_params_dict: model_name = other_params_dict["model"] # Another common key except Exception as e: print(f"Error parsing metadata: {e}") return prompt, negative_prompt, model_name, model_hash, other_params_dict # --- Функции оценки --- @torch.no_grad() def get_image_reward(image_pil): if not reward_model or not reward_processor: return "N/A" try: # ImageReward ожидает специфическую предобработку, часто как у CLIP inputs = reward_processor(images=image_pil, return_tensors="pt", padding=True, truncation=True).to(DEVICE) outputs = reward_model(**inputs) return round(outputs.logits.item(), 4) except Exception as e: print(f"Error in ImageReward: {e}") return "Error" def get_anime_aesthetic_score_deepghs(image_pil): session, labels, meta = get_onnx_session_and_meta(ANIME_AESTHETIC_REPO, ANIME_AESTHETIC_SUBFOLDER) if not session or not labels: return "N/A" try: input_data = _img_preprocess_for_onnx(image_pil.copy(), size=ANIME_AESTHETIC_IMG_SIZE) input_name = session.get_inputs()[0].name output_name = session.get_outputs()[0].name onnx_output, = session.run([output_name], {input_name: input_data}) scores = onnx_output[0] exp_scores = np.exp(scores - np.max(scores)) probabilities = exp_scores / np.sum(exp_scores) weighted_score = sum(probabilities[i] * ANIME_AESTHETIC_LABEL_WEIGHTS.get(label, 0.0) for i, label in enumerate(labels)) return round(weighted_score, 4) except Exception as e: print(f"Error in Anime Aesthetic (ONNX): {e}") return "Error" @torch.no_grad() def get_maniqa_score(image_pil): # Временно возвращает N/A # if not maniqa_pipe: return "N/A" # try: # result = maniqa_pipe(image_pil.copy()) # score = 0.0 # for item in result: # if item['label'].lower() == 'good quality': score = item['score']; break # return round(score, 4) # except Exception as e: # print(f"Error in MANIQA: {e}") # return "Error" return "N/A (Disabled)" @torch.no_grad() def calculate_clip_score_value(image_pil, prompt_text): if not clip_model_instance or not clip_preprocess or not clip_tokenizer or not prompt_text or prompt_text == "N/A": return "N/A" try: image_input = clip_preprocess(image_pil).unsqueeze(0).to(DEVICE) # Убедимся, что prompt_text это строка, а не None или что-то еще text_for_tokenizer = str(prompt_text) if prompt_text else "" if not text_for_tokenizer: return "N/A (Empty Prompt)" text_input = clip_tokenizer([text_for_tokenizer]).to(DEVICE) image_features = clip_model_instance.encode_image(image_input) text_features = clip_model_instance.encode_text(text_input) image_features_norm = image_features / image_features.norm(p=2, dim=-1, keepdim=True) text_features_norm = text_features / text_features.norm(p=2, dim=-1, keepdim=True) score = (text_features_norm @ image_features_norm.T).squeeze().item() * 100.0 return round(score, 2) except Exception as e: print(f"Error in CLIP Score: {e}") return "Error" @torch.no_grad() def get_sdxl_detection_score(image_pil): if not sdxl_detector_pipe: return "N/A" try: result = sdxl_detector_pipe(image_pil.copy()) ai_score = 0.0 for item in result: if item['label'].lower() == 'artificial': ai_score = item['score']; break return round(ai_score, 4) except Exception as e: print(f"Error in SDXL Detector: {e}") return "Error" def get_anime_ai_check_score_deepghs(image_pil): session, labels, meta = get_onnx_session_and_meta(ANIME_AI_CHECK_REPO, ANIME_AI_CHECK_SUBFOLDER) if not session or not labels: return "N/A" try: input_data = _img_preprocess_for_onnx(image_pil.copy(), size=ANIME_AI_CHECK_IMG_SIZE) input_name = session.get_inputs()[0].name output_name = session.get_outputs()[0].name onnx_output, = session.run([output_name], {input_name: input_data}) scores = onnx_output[0] exp_scores = np.exp(scores - np.max(scores)) probabilities = exp_scores / np.sum(exp_scores) ai_prob = 0.0 for i, label in enumerate(labels): if label.lower() == 'ai': ai_prob = probabilities[i]; break return round(ai_prob, 4) except Exception as e: print(f"Error in Anime AI Check (ONNX): {e}") return "Error" # --- Основная функция обработки --- def process_images(files, progress=gr.Progress(track_tqdm=True)): if not files: return pd.DataFrame(), None, None, None, None, "Please upload some images." all_results = [] for i, file_obj in enumerate(files): filename = "Unknown File" try: # file_obj.name может быть абсолютным путем на сервере filename = os.path.basename(getattr(file_obj, 'name', f"file_{i}")) img = Image.open(getattr(file_obj, 'name', str(file_obj))) if img.mode != "RGB": img = img.convert("RGB") prompt, neg_prompt, model_n, model_h, other_p = extract_sd_parameters(img) reward = get_image_reward(img) anime_aes_deepghs = get_anime_aesthetic_score_deepghs(img) maniqa = get_maniqa_score(img) # Будет N/A (Disabled) clip_val = calculate_clip_score_value(img, prompt) sdxl_detect = get_sdxl_detection_score(img) anime_ai_chk_deepghs = get_anime_ai_check_score_deepghs(img) all_results.append({ "Filename": filename, "Prompt": prompt if prompt else "N/A", "Model Name": model_n, "Model Hash": model_h, "ImageReward": reward, "AnimeAesthetic_dg": anime_aes_deepghs, "MANIQA_TQ": maniqa, "CLIPScore": clip_val, "SDXL_Detector_AI_Prob": sdxl_detect, "AnimeAI_Check_dg_Prob": anime_ai_chk_deepghs, }) except Exception as e: print(f"CRITICAL: Failed to process {filename}: {e}") all_results.append({ "Filename": filename, "Prompt": "Error", "Model Name": "Error", "Model Hash": "Error", "ImageReward": "Error", "AnimeAesthetic_dg": "Error", "MANIQA_TQ": "Error", "CLIPScore": "Error", "SDXL_Detector_AI_Prob": "Error", "AnimeAI_Check_dg_Prob": "Error" }) df = pd.DataFrame(all_results) plot_model_avg_scores_buffer, plot_prompt_clip_scores_buffer = None, None csv_buffer_val, json_buffer_val = "", "" if not df.empty: numeric_cols = ["ImageReward", "AnimeAesthetic_dg", "MANIQA_TQ", "CLIPScore"] for col in numeric_cols: df[col] = pd.to_numeric(df[col], errors='coerce') # График 1 df_model_plot = df[(df["Model Name"] != "N/A") & (df["Model Name"].notna())] if not df_model_plot.empty and df_model_plot["Model Name"].nunique() > 0: try: model_avg_scores = df_model_plot.groupby("Model Name")[numeric_cols].mean().dropna(how='all') if not model_avg_scores.empty: fig1, ax1 = plt.subplots(figsize=(12, 7)); model_avg_scores.plot(kind="bar", ax=ax1) ax1.set_title("Average Scores per Model"); ax1.set_ylabel("Average Score") ax1.tick_params(axis='x', rotation=45, labelsize=8); plt.tight_layout() plot_model_avg_scores_buffer = io.BytesIO(); fig1.savefig(plot_model_avg_scores_buffer, format="png"); plot_model_avg_scores_buffer.seek(0); plt.close(fig1) except Exception as e: print(f"Error generating model average scores plot: {e}") # График 2 df_prompt_plot = df[(df["Prompt"] != "N/A") & (df["Prompt"].notna()) & (df["CLIPScore"].notna())] if not df_prompt_plot.empty and df_prompt_plot["Prompt"].nunique() > 0 : try: df_prompt_plot["Short Prompt"] = df_prompt_plot["Prompt"].apply(lambda x: (str(x)[:30] + '...') if len(str(x)) > 33 else str(x)) prompt_clip_scores = df_prompt_plot.groupby("Short Prompt")["CLIPScore"].mean().sort_values(ascending=False) if not prompt_clip_scores.empty and len(prompt_clip_scores) >= 1 : # Изменено на >=1 для одиночных промптов fig2, ax2 = plt.subplots(figsize=(12, max(7, min(len(prompt_clip_scores)*0.5, 15)))) prompt_clip_scores.head(20).plot(kind="barh", ax=ax2) ax2.set_title("Average CLIPScore per Prompt (Top 20 unique prompts)"); ax2.set_xlabel("Average CLIPScore") plt.tight_layout(); plot_prompt_clip_scores_buffer = io.BytesIO(); fig2.savefig(plot_prompt_clip_scores_buffer, format="png"); plot_prompt_clip_scores_buffer.seek(0); plt.close(fig2) except Exception as e: print(f"Error generating prompt CLIP scores plot: {e}") csv_b = io.StringIO(); df.to_csv(csv_b, index=False); csv_buffer_val = csv_b.getvalue() json_b = io.StringIO(); df.to_json(json_b, orient='records', indent=4); json_buffer_val = json_b.getvalue() return ( df, gr.Image(value=plot_model_avg_scores_buffer, type="pil", visible=plot_model_avg_scores_buffer is not None), gr.Image(value=plot_prompt_clip_scores_buffer, type="pil", visible=plot_prompt_clip_scores_buffer is not None), gr.File(value=csv_buffer_val or None, label="Download CSV Results", visible=bool(csv_buffer_val), file_name="evaluation_results.csv"), gr.File(value=json_buffer_val or None, label="Download JSON Results", visible=bool(json_buffer_val), file_name="evaluation_results.json"), f"Processed {len(all_results)} images.", ) # --- Интерфейс Gradio --- with gr.Blocks(css="footer {display: none !important}") as demo: gr.Markdown("# AI Image Model Evaluation Tool") gr.Markdown("Upload PNG images (ideally with Stable Diffusion metadata) to evaluate them...") with gr.Row(): image_uploader = gr.Files(label="Upload Images (PNG)", file_count="multiple", file_types=["image"]) process_button = gr.Button("Evaluate Images", variant="primary") status_textbox = gr.Textbox(label="Status", interactive=False) gr.Markdown("## Evaluation Results Table") results_table = gr.DataFrame(headers=[ # Убран max_rows "Filename", "Prompt", "Model Name", "Model Hash", "ImageReward", "AnimeAesthetic_dg", "MANIQA_TQ", "CLIPScore", "SDXL_Detector_AI_Prob", "AnimeAI_Check_dg_Prob" ], wrap=True) with gr.Row(): download_csv_button = gr.File(label="Download CSV Results", interactive=False) download_json_button = gr.File(label="Download JSON Results", interactive=False) gr.Markdown("## Visualizations") with gr.Row(): plot_output_model_avg = gr.Image(label="Average Scores per Model", type="pil", interactive=False) plot_output_prompt_clip = gr.Image(label="Average CLIPScore per Prompt", type="pil", interactive=False) process_button.click(fn=process_images, inputs=[image_uploader], outputs=[ results_table, plot_output_model_avg, plot_output_prompt_clip, download_csv_button, download_json_button, status_textbox ]) gr.Markdown("""**Metric Explanations:** ... (без изменений)""") if __name__ == "__main__": demo.launch(debug=True)