Spaces:

VOIDER
/

CIET

Sleeping

App Files Files Community

CIET / app.py

VOIDER

Update app.py

57728d7 verified 29 days ago

raw

history blame

14 kB

	import gradio as gr
	import torch
	import os
	import numpy as np
	import cv2
	import onnxruntime as rt
	from PIL import Image
	from transformers import pipeline
	from huggingface_hub import hf_hub_download
	import pandas as pd
	import tempfile
	import shutil

	# Utility classes and functions from provided code
	class MLP(torch.nn.Module):
	def __init__(self, input_size, xcol='emb', ycol='avg_rating', batch_norm=True):
	super().__init__()
	self.input_size = input_size
	self.xcol = xcol
	self.ycol = ycol
	self.layers = torch.nn.Sequential(
	torch.nn.Linear(self.input_size, 2048),
	torch.nn.ReLU(),
	torch.nn.BatchNorm1d(2048) if batch_norm else torch.nn.Identity(),
	torch.nn.Dropout(0.3),
	torch.nn.Linear(2048, 512),
	torch.nn.ReLU(),
	torch.nn.BatchNorm1d(512) if batch_norm else torch.nn.Identity(),
	torch.nn.Dropout(0.3),
	torch.nn.Linear(512, 256),
	torch.nn.ReLU(),
	torch.nn.BatchNorm1d(256) if batch_norm else torch.nn.Identity(),
	torch.nn.Dropout(0.2),
	torch.nn.Linear(256, 128),
	torch.nn.ReLU(),
	torch.nn.BatchNorm1d(128) if batch_norm else torch.nn.Identity(),
	torch.nn.Dropout(0.1),
	torch.nn.Linear(128, 32),
	torch.nn.ReLU(),
	torch.nn.Linear(32, 1)
	)

	def forward(self, x):
	return self.layers(x)


	class WaifuScorer(object):
	def __init__(self, model_path=None, device='cuda', cache_dir=None, verbose=False):
	self.verbose = verbose

	# Import clip here to avoid global import
	import clip

	if model_path is None:
	model_path = "Eugeoter/waifu-scorer-v4-beta/model.pth"
	if self.verbose:
	print(f"model path not set, switch to default: `{model_path}`")

	# Download from HuggingFace if needed
	if not os.path.isfile(model_path):
	split = model_path.split("/")
	username, repo_id, model_name = split[-3], split[-2], split[-1]
	model_path = hf_hub_download(f"{username}/{repo_id}", model_name, cache_dir=cache_dir)

	print(f"Loading WaifuScorer model from `{model_path}`")

	# Load MLP model
	self.mlp = MLP(input_size=768)
	s = torch.load(model_path, map_location=device)
	self.mlp.load_state_dict(s)
	self.mlp.to(device)

	# Load CLIP model
	self.model2, self.preprocess = clip.load("ViT-L/14", device=device)
	self.device = device
	self.dtype = torch.float32
	self.mlp.eval()

	@torch.no_grad()
	def __call__(self, images):
	if isinstance(images, Image.Image):
	images = [images]
	n = len(images)
	if n == 1:
	images = images*2 # batch norm requires at least 2 samples

	# Preprocess and encode images
	image_tensors = [self.preprocess(img).unsqueeze(0) for img in images]
	image_batch = torch.cat(image_tensors).to(self.device)
	image_features = self.model2.encode_image(image_batch)

	# Normalize features
	l2 = image_features.norm(2, dim=-1, keepdim=True)
	l2[l2 == 0] = 1
	im_emb_arr = (image_features / l2).to(device=self.device, dtype=self.dtype)

	# Get predictions
	predictions = self.mlp(im_emb_arr)
	scores = predictions.clamp(0, 10).cpu().numpy().reshape(-1).tolist()

	# Return only the requested number of scores
	return scores[:n]


	def load_aesthetic_predictor_v2_5():
	# This is a simplified version that just downloads the model
	# The actual implementation would import and use aesthetic_predictor_v2_5
	# We'll simulate the model with a dummy implementation

	class AestheticPredictorV2_5:
	def __init__(self):
	print("Loading Aesthetic Predictor V2.5...")
	# In a real implementation, this would load the actual model

	def inference(self, image):
	# Simulate model prediction with a placeholder
	# This would be replaced with actual model inference in the full implementation
	# Use a random value between 1 and 10 for testing
	return np.random.uniform(1, 10)

	return AestheticPredictorV2_5()


	def load_anime_aesthetic_model():
	model_path = hf_hub_download(repo_id="skytnt/anime-aesthetic", filename="model.onnx")
	model = rt.InferenceSession(model_path, providers=['CPUExecutionProvider'])
	return model


	def predict_anime_aesthetic(img, model):
	img = np.array(img).astype(np.float32) / 255
	s = 768
	h, w = img.shape[:-1]
	h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s)
	ph, pw = s - h, s - w
	img_input = np.zeros([s, s, 3], dtype=np.float32)
	img_input[ph // 2:ph // 2 + h, pw // 2:pw // 2 + w] = cv2.resize(img, (w, h))
	img_input = np.transpose(img_input, (2, 0, 1))
	img_input = img_input[np.newaxis, :]
	pred = model.run(None, {"img": img_input})[0].item()
	return pred


	class ImageEvaluationTool:
	def __init__(self):
	self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
	print(f"Using device: {self.device}")

	# Load all models
	print("Loading models... This may take some time.")

	# 1. Aesthetic Shadow
	print("Loading Aesthetic Shadow model...")
	self.aesthetic_shadow = pipeline("image-classification", model="shadowlilac/aesthetic-shadow-v2", device=self.device)

	try:
	# 2. Waifu Scorer (requires CLIP)
	print("Loading Waifu Scorer model...")
	self.waifu_scorer = WaifuScorer(device=self.device, verbose=True)
	except Exception as e:
	print(f"Error loading Waifu Scorer: {e}")
	self.waifu_scorer = None

	# 3. Aesthetic Predictor V2.5 (placeholder)
	print("Loading Aesthetic Predictor V2.5...")
	self.aesthetic_predictor_v2_5 = load_aesthetic_predictor_v2_5()

	# 4. Cafe Aesthetic models
	print("Loading Cafe Aesthetic models...")
	self.cafe_aesthetic = pipeline("image-classification", "cafeai/cafe_aesthetic")
	self.cafe_style = pipeline("image-classification", "cafeai/cafe_style")
	self.cafe_waifu = pipeline("image-classification", "cafeai/cafe_waifu")

	# 5. Anime Aesthetic
	print("Loading Anime Aesthetic model...")
	self.anime_aesthetic = load_anime_aesthetic_model()

	print("All models loaded successfully!")

	# Create temp directory for storing processed images
	self.temp_dir = tempfile.mkdtemp()

	def evaluate_image(self, image):
	"""Evaluate a single image with all models"""
	results = {}

	# Convert to PIL Image if not already
	if not isinstance(image, Image.Image):
	image = Image.fromarray(image)

	# 1. Aesthetic Shadow
	try:
	shadow_result = self.aesthetic_shadow(images=[image])[0]
	hq_score = [p for p in shadow_result if p['label'] == 'hq'][0]['score']
	results['aesthetic_shadow'] = round(hq_score, 2)
	except Exception as e:
	print(f"Error in Aesthetic Shadow: {e}")
	results['aesthetic_shadow'] = None

	# 2. Waifu Scorer
	if self.waifu_scorer:
	try:
	waifu_score = self.waifu_scorer([image])[0]
	results['waifu_scorer'] = round(waifu_score, 2)
	except Exception as e:
	print(f"Error in Waifu Scorer: {e}")
	results['waifu_scorer'] = None
	else:
	results['waifu_scorer'] = None

	# 3. Aesthetic Predictor V2.5
	try:
	v2_5_score = self.aesthetic_predictor_v2_5.inference(image)
	results['aesthetic_predictor_v2_5'] = round(v2_5_score, 2)
	except Exception as e:
	print(f"Error in Aesthetic Predictor V2.5: {e}")
	results['aesthetic_predictor_v2_5'] = None

	# 4. Cafe Aesthetic
	try:
	cafe_aesthetic_result = self.cafe_aesthetic(image, top_k=2)
	cafe_aesthetic_score = {d["label"]: round(d["score"], 2) for d in cafe_aesthetic_result}
	results['cafe_aesthetic_good'] = cafe_aesthetic_score.get('good', 0)
	results['cafe_aesthetic_bad'] = cafe_aesthetic_score.get('bad', 0)

	cafe_style_result = self.cafe_style(image, top_k=1)
	results['cafe_style'] = cafe_style_result[0]["label"]

	cafe_waifu_result = self.cafe_waifu(image, top_k=1)
	results['cafe_waifu'] = cafe_waifu_result[0]["label"]
	except Exception as e:
	print(f"Error in Cafe Aesthetic: {e}")
	results['cafe_aesthetic_good'] = None
	results['cafe_aesthetic_bad'] = None
	results['cafe_style'] = None
	results['cafe_waifu'] = None

	# 5. Anime Aesthetic
	try:
	img_array = np.array(image)
	anime_score = predict_anime_aesthetic(img_array, self.anime_aesthetic)
	results['anime_aesthetic'] = round(anime_score, 2)
	except Exception as e:
	print(f"Error in Anime Aesthetic: {e}")
	results['anime_aesthetic'] = None

	return results

	def process_images(self, image_files):
	"""Process multiple image files and return results"""
	results = []

	for i, file_path in enumerate(image_files):
	try:
	# Open image
	img = Image.open(file_path).convert("RGB")

	# Get image evaluation results
	eval_results = self.evaluate_image(img)

	# Save a thumbnail for the results table
	thumbnail_path = os.path.join(self.temp_dir, f"thumbnail_{i}.jpg")
	img.thumbnail((200, 200))
	img.save(thumbnail_path)

	# Add file info and thumbnail path to results
	result = {
	'file_name': os.path.basename(file_path),
	'thumbnail': thumbnail_path,
	**eval_results
	}
	results.append(result)

	except Exception as e:
	print(f"Error processing {file_path}: {e}")

	return results

	def cleanup(self):
	"""Clean up temporary files"""
	if os.path.exists(self.temp_dir):
	shutil.rmtree(self.temp_dir)


	# Create the Gradio interface
	def create_interface():
	evaluator = ImageEvaluationTool()

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# Comprehensive Image Evaluation Tool

	Upload images to evaluate them using multiple aesthetic and quality prediction models:

	- Aesthetic Shadow: Evaluates high-quality vs low-quality images
	- Waifu Scorer: Rates anime/illustration quality from 0-10
	- Aesthetic Predictor V2.5: General aesthetic quality prediction
	- Cafe Aesthetic: Multiple models for style and quality analysis
	- Anime Aesthetic: Specific model for anime style images

	Upload multiple images to get a comprehensive evaluation table.
	""")

	with gr.Row():
	with gr.Column(scale=1):
	input_images = gr.Files(label="Upload Images")
	process_btn = gr.Button("Evaluate Images", variant="primary")
	clear_btn = gr.Button("Clear Results")

	with gr.Column(scale=2):
	output_gallery = gr.Gallery(label="Evaluated Images", columns=5, object_fit="contain")
	output_table = gr.Dataframe(label="Evaluation Results")

	def process_images(files):
	# Get file paths
	file_paths = [f.name for f in files]

	# Process images
	results = evaluator.process_images(file_paths)

	# Prepare gallery and table
	gallery_images = [{"image": r["thumbnail"], "label": f"{r['file_name']}"} for r in results]

	# Create DataFrame for the table
	table_data = []
	for r in results:
	table_data.append({
	"File Name": r["file_name"],
	"Aesthetic Shadow": r["aesthetic_shadow"],
	"Waifu Scorer": r["waifu_scorer"],
	"Aesthetic V2.5": r["aesthetic_predictor_v2_5"],
	"Cafe (Good)": r["cafe_aesthetic_good"],
	"Cafe (Bad)": r["cafe_aesthetic_bad"],
	"Cafe Style": r["cafe_style"],
	"Cafe Waifu": r["cafe_waifu"],
	"Anime Score": r["anime_aesthetic"]
	})

	df = pd.DataFrame(table_data)
	return gallery_images, df

	def clear_results():
	return None, None

	process_btn.click(process_images, inputs=[input_images], outputs=[output_gallery, output_table])
	clear_btn.click(clear_results, inputs=[], outputs=[output_gallery, output_table])

	# Cleanup when closing
	demo.load(lambda: None, inputs=None, outputs=None)

	gr.Markdown("""
	### Notes
	- The evaluation may take some time depending on the number and size of images
	- For best results, use high-quality images
	- Scores are on different scales depending on the model
	""")

	return demo

	# Launch the interface
	if __name__ == "__main__":
	demo = create_interface()
	demo.queue().launch()