VOIDER commited on
Commit
3fde652
·
verified ·
1 Parent(s): eda3919

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +944 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,944 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import gradio as gr
5
+ import numpy as np
6
+ import pandas as pd
7
+ import matplotlib.pyplot as plt
8
+ from PIL import Image
9
+ import torch
10
+ import cv2
11
+
12
+ # Create necessary directories
13
+ os.makedirs('/tmp/image_evaluator_uploads', exist_ok=True)
14
+ os.makedirs('/tmp/image_evaluator_results', exist_ok=True)
15
+
16
+ # Base Evaluator class
17
+ class BaseEvaluator:
18
+ """
19
+ Base class for all image quality evaluators.
20
+ All evaluator implementations should inherit from this class.
21
+ """
22
+
23
+ def __init__(self, config=None):
24
+ """
25
+ Initialize the evaluator with optional configuration.
26
+
27
+ Args:
28
+ config (dict, optional): Configuration parameters for the evaluator.
29
+ """
30
+ self.config = config or {}
31
+
32
+ def evaluate(self, image_path):
33
+ """
34
+ Evaluate a single image and return scores.
35
+
36
+ Args:
37
+ image_path (str): Path to the image file.
38
+
39
+ Returns:
40
+ dict: Dictionary containing evaluation scores.
41
+ """
42
+ raise NotImplementedError("Subclasses must implement evaluate()")
43
+
44
+ def batch_evaluate(self, image_paths):
45
+ """
46
+ Evaluate multiple images.
47
+
48
+ Args:
49
+ image_paths (list): List of paths to image files.
50
+
51
+ Returns:
52
+ list: List of dictionaries containing evaluation scores for each image.
53
+ """
54
+ return [self.evaluate(img_path) for img_path in image_paths]
55
+
56
+ def get_metadata(self):
57
+ """
58
+ Return metadata about this evaluator.
59
+
60
+ Returns:
61
+ dict: Dictionary containing metadata about the evaluator.
62
+ """
63
+ raise NotImplementedError("Subclasses must implement get_metadata()")
64
+
65
+ # Technical Evaluator
66
+ class TechnicalEvaluator(BaseEvaluator):
67
+ """
68
+ Evaluator for basic technical image quality metrics.
69
+ Measures sharpness, noise, artifacts, and other technical aspects.
70
+ """
71
+
72
+ def __init__(self, config=None):
73
+ super().__init__(config)
74
+ self.config.setdefault('laplacian_ksize', 3)
75
+ self.config.setdefault('blur_threshold', 100)
76
+ self.config.setdefault('noise_threshold', 0.05)
77
+
78
+ def evaluate(self, image_path):
79
+ """
80
+ Evaluate technical aspects of an image.
81
+
82
+ Args:
83
+ image_path (str): Path to the image file.
84
+
85
+ Returns:
86
+ dict: Dictionary containing technical evaluation scores.
87
+ """
88
+ try:
89
+ # Load image
90
+ img = cv2.imread(image_path)
91
+ if img is None:
92
+ return {
93
+ 'error': 'Failed to load image',
94
+ 'overall_technical': 0.0
95
+ }
96
+
97
+ # Convert to grayscale for some calculations
98
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
99
+
100
+ # Calculate sharpness using Laplacian variance
101
+ laplacian = cv2.Laplacian(gray, cv2.CV_64F, ksize=self.config['laplacian_ksize'])
102
+ sharpness_score = np.var(laplacian) / 10000 # Normalize
103
+ sharpness_score = min(1.0, sharpness_score) # Cap at 1.0
104
+
105
+ # Calculate noise level
106
+ # Using a simple method based on standard deviation in smooth areas
107
+ blur = cv2.GaussianBlur(gray, (11, 11), 0)
108
+ diff = cv2.absdiff(gray, blur)
109
+ noise_level = np.std(diff) / 255.0
110
+ noise_score = 1.0 - min(1.0, noise_level / self.config['noise_threshold'])
111
+
112
+ # Check for compression artifacts
113
+ edges = cv2.Canny(gray, 100, 200)
114
+ artifact_score = 1.0 - (np.count_nonzero(edges) / (gray.shape[0] * gray.shape[1]))
115
+ artifact_score = max(0.0, min(1.0, artifact_score * 2)) # Adjust range
116
+
117
+ # Calculate color range and saturation
118
+ hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
119
+ saturation = hsv[:, :, 1]
120
+ saturation_score = np.mean(saturation) / 255.0
121
+
122
+ # Calculate contrast
123
+ min_val, max_val, _, _ = cv2.minMaxLoc(gray)
124
+ contrast_score = (max_val - min_val) / 255.0
125
+
126
+ # Calculate overall technical score (weighted average)
127
+ overall_technical = (
128
+ 0.3 * sharpness_score +
129
+ 0.2 * noise_score +
130
+ 0.2 * artifact_score +
131
+ 0.15 * saturation_score +
132
+ 0.15 * contrast_score
133
+ )
134
+
135
+ return {
136
+ 'sharpness': float(sharpness_score),
137
+ 'noise': float(noise_score),
138
+ 'artifacts': float(artifact_score),
139
+ 'saturation': float(saturation_score),
140
+ 'contrast': float(contrast_score),
141
+ 'overall_technical': float(overall_technical)
142
+ }
143
+
144
+ except Exception as e:
145
+ return {
146
+ 'error': str(e),
147
+ 'overall_technical': 0.0
148
+ }
149
+
150
+ def get_metadata(self):
151
+ """
152
+ Return metadata about this evaluator.
153
+
154
+ Returns:
155
+ dict: Dictionary containing metadata about the evaluator.
156
+ """
157
+ return {
158
+ 'id': 'technical',
159
+ 'name': 'Technical Metrics',
160
+ 'description': 'Evaluates basic technical aspects of image quality including sharpness, noise, artifacts, saturation, and contrast.',
161
+ 'version': '1.0',
162
+ 'metrics': [
163
+ {'id': 'sharpness', 'name': 'Sharpness', 'description': 'Measures image clarity and detail'},
164
+ {'id': 'noise', 'name': 'Noise', 'description': 'Measures absence of unwanted variations'},
165
+ {'id': 'artifacts', 'name': 'Artifacts', 'description': 'Measures absence of compression artifacts'},
166
+ {'id': 'saturation', 'name': 'Saturation', 'description': 'Measures color intensity'},
167
+ {'id': 'contrast', 'name': 'Contrast', 'description': 'Measures difference between light and dark areas'},
168
+ {'id': 'overall_technical', 'name': 'Overall Technical', 'description': 'Combined technical quality score'}
169
+ ]
170
+ }
171
+
172
+ # Aesthetic Evaluator
173
+ class AestheticEvaluator(BaseEvaluator):
174
+ """
175
+ Evaluator for aesthetic image quality.
176
+ Uses a simplified aesthetic assessment model.
177
+ """
178
+
179
+ def __init__(self, config=None):
180
+ super().__init__(config)
181
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
182
+
183
+ def evaluate(self, image_path):
184
+ """
185
+ Evaluate aesthetic aspects of an image.
186
+
187
+ Args:
188
+ image_path (str): Path to the image file.
189
+
190
+ Returns:
191
+ dict: Dictionary containing aesthetic evaluation scores.
192
+ """
193
+ try:
194
+ # Load and preprocess image
195
+ img = Image.open(image_path).convert('RGB')
196
+
197
+ # Convert to numpy array for calculations
198
+ img_np = np.array(img)
199
+
200
+ # Calculate color harmony using standard deviation of colors
201
+ r, g, b = img_np[:,:,0], img_np[:,:,1], img_np[:,:,2]
202
+ color_std = (np.std(r) + np.std(g) + np.std(b)) / 3
203
+ color_harmony = min(1.0, color_std / 80.0) # Normalize
204
+
205
+ # Calculate composition score using rule of thirds
206
+ h, w = img_np.shape[:2]
207
+ third_h, third_w = h // 3, w // 3
208
+
209
+ # Create a rule of thirds grid mask
210
+ grid_mask = np.zeros((h, w))
211
+ for i in range(1, 3):
212
+ grid_mask[third_h * i - 5:third_h * i + 5, :] = 1
213
+ grid_mask[:, third_w * i - 5:third_w * i + 5] = 1
214
+
215
+ # Convert to grayscale for edge detection
216
+ gray = np.mean(img_np, axis=2).astype(np.uint8)
217
+
218
+ # Simple edge detection
219
+ edges = np.abs(np.diff(gray, axis=0, prepend=0)) + np.abs(np.diff(gray, axis=1, prepend=0))
220
+ edges = edges > 30 # Threshold
221
+
222
+ # Calculate how many edges fall on the rule of thirds lines
223
+ thirds_alignment = np.sum(edges * grid_mask) / max(1, np.sum(edges))
224
+ composition_score = min(1.0, thirds_alignment * 3) # Scale up for better distribution
225
+
226
+ # Calculate visual interest using entropy
227
+ hist_r = np.histogram(r, bins=256, range=(0, 256))[0] / (h * w)
228
+ hist_g = np.histogram(g, bins=256, range=(0, 256))[0] / (h * w)
229
+ hist_b = np.histogram(b, bins=256, range=(0, 256))[0] / (h * w)
230
+
231
+ entropy_r = -np.sum(hist_r[hist_r > 0] * np.log2(hist_r[hist_r > 0]))
232
+ entropy_g = -np.sum(hist_g[hist_g > 0] * np.log2(hist_g[hist_g > 0]))
233
+ entropy_b = -np.sum(hist_b[hist_b > 0] * np.log2(hist_b[hist_b > 0]))
234
+
235
+ entropy = (entropy_r + entropy_g + entropy_b) / 3
236
+ visual_interest = min(1.0, entropy / 7.5) # Normalize
237
+
238
+ # Calculate overall aesthetic score (weighted average)
239
+ overall_aesthetic = (
240
+ 0.4 * color_harmony +
241
+ 0.3 * composition_score +
242
+ 0.3 * visual_interest
243
+ )
244
+
245
+ return {
246
+ 'color_harmony': float(color_harmony),
247
+ 'composition': float(composition_score),
248
+ 'visual_interest': float(visual_interest),
249
+ 'overall_aesthetic': float(overall_aesthetic)
250
+ }
251
+
252
+ except Exception as e:
253
+ return {
254
+ 'error': str(e),
255
+ 'overall_aesthetic': 0.0
256
+ }
257
+
258
+ def get_metadata(self):
259
+ """
260
+ Return metadata about this evaluator.
261
+
262
+ Returns:
263
+ dict: Dictionary containing metadata about the evaluator.
264
+ """
265
+ return {
266
+ 'id': 'aesthetic',
267
+ 'name': 'Aesthetic Assessment',
268
+ 'description': 'Evaluates aesthetic qualities of images including color harmony, composition, and visual interest.',
269
+ 'version': '1.0',
270
+ 'metrics': [
271
+ {'id': 'color_harmony', 'name': 'Color Harmony', 'description': 'Measures how well colors work together'},
272
+ {'id': 'composition', 'name': 'Composition', 'description': 'Measures adherence to compositional principles like rule of thirds'},
273
+ {'id': 'visual_interest', 'name': 'Visual Interest', 'description': 'Measures how visually engaging the image is'},
274
+ {'id': 'overall_aesthetic', 'name': 'Overall Aesthetic', 'description': 'Combined aesthetic quality score'}
275
+ ]
276
+ }
277
+
278
+ # Anime Style Evaluator
279
+ class AnimeStyleEvaluator(BaseEvaluator):
280
+ """
281
+ Specialized evaluator for anime-style images.
282
+ Focuses on line quality, character design, style consistency, and other anime-specific attributes.
283
+ """
284
+
285
+ def __init__(self, config=None):
286
+ super().__init__(config)
287
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
288
+
289
+ def evaluate(self, image_path):
290
+ """
291
+ Evaluate anime-specific aspects of an image.
292
+
293
+ Args:
294
+ image_path (str): Path to the image file.
295
+
296
+ Returns:
297
+ dict: Dictionary containing anime-style evaluation scores.
298
+ """
299
+ try:
300
+ # Load image
301
+ img = Image.open(image_path).convert('RGB')
302
+ img_np = np.array(img)
303
+
304
+ # Line quality assessment
305
+ gray = np.mean(img_np, axis=2).astype(np.uint8)
306
+
307
+ # Calculate gradients for edge detection
308
+ gx = np.abs(np.diff(gray, axis=1, prepend=0))
309
+ gy = np.abs(np.diff(gray, axis=0, prepend=0))
310
+
311
+ # Combine gradients
312
+ edges = np.maximum(gx, gy)
313
+
314
+ # Strong edges are characteristic of anime
315
+ strong_edges = edges > 50
316
+ edge_ratio = np.sum(strong_edges) / (gray.shape[0] * gray.shape[1])
317
+
318
+ # Line quality score - anime typically has a higher proportion of strong edges
319
+ line_quality = min(1.0, edge_ratio * 20) # Scale appropriately
320
+
321
+ # Color palette assessment
322
+ pixels = img_np.reshape(-1, 3)
323
+ sample_size = min(10000, pixels.shape[0])
324
+ indices = np.random.choice(pixels.shape[0], sample_size, replace=False)
325
+ sampled_pixels = pixels[indices]
326
+
327
+ # Calculate color diversity (simplified)
328
+ color_std = np.std(sampled_pixels, axis=0)
329
+ color_diversity = np.mean(color_std) / 128.0 # Normalize
330
+
331
+ # Anime often has a good balance of diversity but not excessive
332
+ color_score = 1.0 - abs(color_diversity - 0.5) * 2 # Penalize too high or too low
333
+
334
+ # Placeholder for character quality
335
+ character_quality = 0.85 # Default value for prototype
336
+
337
+ # Style consistency assessment
338
+ hsv = np.array(img.convert('HSV'))
339
+ saturation = hsv[:,:,1]
340
+ value = hsv[:,:,2]
341
+
342
+ # Calculate statistics
343
+ sat_mean = np.mean(saturation) / 255.0
344
+ val_mean = np.mean(value) / 255.0
345
+
346
+ # Anime often has higher saturation and controlled brightness
347
+ sat_score = 1.0 - abs(sat_mean - 0.7) * 2 # Ideal around 0.7
348
+ val_score = 1.0 - abs(val_mean - 0.6) * 2 # Ideal around 0.6
349
+
350
+ style_consistency = (sat_score + val_score) / 2
351
+
352
+ # Overall anime score (weighted average)
353
+ overall_anime = (
354
+ 0.3 * line_quality +
355
+ 0.2 * color_score +
356
+ 0.25 * character_quality +
357
+ 0.25 * style_consistency
358
+ )
359
+
360
+ return {
361
+ 'line_quality': float(line_quality),
362
+ 'color_palette': float(color_score),
363
+ 'character_quality': float(character_quality),
364
+ 'style_consistency': float(style_consistency),
365
+ 'overall_anime': float(overall_anime)
366
+ }
367
+
368
+ except Exception as e:
369
+ return {
370
+ 'error': str(e),
371
+ 'overall_anime': 0.0
372
+ }
373
+
374
+ def get_metadata(self):
375
+ """
376
+ Return metadata about this evaluator.
377
+
378
+ Returns:
379
+ dict: Dictionary containing metadata about the evaluator.
380
+ """
381
+ return {
382
+ 'id': 'anime_specialized',
383
+ 'name': 'Anime Style Evaluator',
384
+ 'description': 'Specialized evaluator for anime-style images, focusing on line quality, color palette, character design, and style consistency.',
385
+ 'version': '1.0',
386
+ 'metrics': [
387
+ {'id': 'line_quality', 'name': 'Line Quality', 'description': 'Measures clarity and quality of line work'},
388
+ {'id': 'color_palette', 'name': 'Color Palette', 'description': 'Evaluates color choices and harmony for anime style'},
389
+ {'id': 'character_quality', 'name': 'Character Quality', 'description': 'Assesses character design and rendering'},
390
+ {'id': 'style_consistency', 'name': 'Style Consistency', 'description': 'Measures adherence to anime style conventions'},
391
+ {'id': 'overall_anime', 'name': 'Overall Anime Quality', 'description': 'Combined anime-specific quality score'}
392
+ ]
393
+ }
394
+
395
+ # Evaluator Manager
396
+ class EvaluatorManager:
397
+ """
398
+ Manager class for handling multiple evaluators.
399
+ Provides a unified interface for evaluating images with different metrics.
400
+ """
401
+
402
+ def __init__(self):
403
+ """Initialize the evaluator manager with available evaluators."""
404
+ self.evaluators = {}
405
+ self._register_default_evaluators()
406
+
407
+ def _register_default_evaluators(self):
408
+ """Register the default set of evaluators."""
409
+ self.register_evaluator(TechnicalEvaluator())
410
+ self.register_evaluator(AestheticEvaluator())
411
+ self.register_evaluator(AnimeStyleEvaluator())
412
+
413
+ def register_evaluator(self, evaluator):
414
+ """
415
+ Register a new evaluator.
416
+
417
+ Args:
418
+ evaluator (BaseEvaluator): The evaluator to register.
419
+ """
420
+ if not isinstance(evaluator, BaseEvaluator):
421
+ raise TypeError("Evaluator must be an instance of BaseEvaluator")
422
+
423
+ metadata = evaluator.get_metadata()
424
+ self.evaluators[metadata['id']] = evaluator
425
+
426
+ def get_available_evaluators(self):
427
+ """
428
+ Get a list of available evaluators.
429
+
430
+ Returns:
431
+ list: List of evaluator metadata.
432
+ """
433
+ return [evaluator.get_metadata() for evaluator in self.evaluators.values()]
434
+
435
+ def evaluate_image(self, image_path, evaluator_ids=None):
436
+ """
437
+ Evaluate an image using specified evaluators.
438
+
439
+ Args:
440
+ image_path (str): Path to the image file.
441
+ evaluator_ids (list, optional): List of evaluator IDs to use.
442
+ If None, all available evaluators will be used.
443
+
444
+ Returns:
445
+ dict: Dictionary containing evaluation results from each evaluator.
446
+ """
447
+ if not os.path.exists(image_path):
448
+ return {'error': f'Image file not found: {image_path}'}
449
+
450
+ if evaluator_ids is None:
451
+ evaluator_ids = list(self.evaluators.keys())
452
+
453
+ results = {}
454
+ for evaluator_id in evaluator_ids:
455
+ if evaluator_id in self.evaluators:
456
+ results[evaluator_id] = self.evaluators[evaluator_id].evaluate(image_path)
457
+ else:
458
+ results[evaluator_id] = {'error': f'Evaluator not found: {evaluator_id}'}
459
+
460
+ return results
461
+
462
+ def batch_evaluate_images(self, image_paths, evaluator_ids=None):
463
+ """
464
+ Evaluate multiple images using specified evaluators.
465
+
466
+ Args:
467
+ image_paths (list): List of paths to image files.
468
+ evaluator_ids (list, optional): List of evaluator IDs to use.
469
+ If None, all available evaluators will be used.
470
+
471
+ Returns:
472
+ list: List of dictionaries containing evaluation results for each image.
473
+ """
474
+ return [self.evaluate_image(path, evaluator_ids) for path in image_paths]
475
+
476
+ def compare_models(self, model_results):
477
+ """
478
+ Compare different models based on evaluation results.
479
+
480
+ Args:
481
+ model_results (dict): Dictionary mapping model names to their evaluation results.
482
+
483
+ Returns:
484
+ dict: Comparison results including rankings and best model.
485
+ """
486
+ if not model_results:
487
+ return {'error': 'No model results provided for comparison'}
488
+
489
+ # Calculate average scores for each model across all images and evaluators
490
+ model_scores = {}
491
+
492
+ for model_name, image_results in model_results.items():
493
+ model_scores[model_name] = {
494
+ 'technical': 0.0,
495
+ 'aesthetic': 0.0,
496
+ 'anime_specialized': 0.0,
497
+ 'overall': 0.0
498
+ }
499
+
500
+ image_count = len(image_results)
501
+ if image_count == 0:
502
+ continue
503
+
504
+ # Sum up scores across all images
505
+ for image_id, evaluations in image_results.items():
506
+ if 'technical' in evaluations and 'overall_technical' in evaluations['technical']:
507
+ model_scores[model_name]['technical'] += evaluations['technical']['overall_technical']
508
+
509
+ if 'aesthetic' in evaluations and 'overall_aesthetic' in evaluations['aesthetic']:
510
+ model_scores[model_name]['aesthetic'] += evaluations['aesthetic']['overall_aesthetic']
511
+
512
+ if 'anime_specialized' in evaluations and 'overall_anime' in evaluations['anime_specialized']:
513
+ model_scores[model_name]['anime_specialized'] += evaluations['anime_specialized']['overall_anime']
514
+
515
+ # Calculate averages
516
+ model_scores[model_name]['technical'] /= image_count
517
+ model_scores[model_name]['aesthetic'] /= image_count
518
+ model_scores[model_name]['anime_specialized'] /= image_count
519
+
520
+ # Calculate overall score (weighted average of all metrics)
521
+ model_scores[model_name]['overall'] = (
522
+ 0.3 * model_scores[model_name]['technical'] +
523
+ 0.4 * model_scores[model_name]['aesthetic'] +
524
+ 0.3 * model_scores[model_name]['anime_specialized']
525
+ )
526
+
527
+ # Rank models by overall score
528
+ rankings = sorted(
529
+ [(model, scores['overall']) for model, scores in model_scores.items()],
530
+ key=lambda x: x[1],
531
+ reverse=True
532
+ )
533
+
534
+ # Format rankings
535
+ formatted_rankings = [
536
+ {'rank': i+1, 'model': model, 'score': score}
537
+ for i, (model, score) in enumerate(rankings)
538
+ ]
539
+
540
+ # Determine best model
541
+ best_model = rankings[0][0] if rankings else None
542
+
543
+ # Format comparison metrics
544
+ comparison_metrics = {
545
+ 'technical': {model: scores['technical'] for model, scores in model_scores.items()},
546
+ 'aesthetic': {model: scores['aesthetic'] for model, scores in model_scores.items()},
547
+ 'anime_specialized': {model: scores['anime_specialized'] for model, scores in model_scores.items()},
548
+ 'overall': {model: scores['overall'] for model, scores in model_scores.items()}
549
+ }
550
+
551
+ return {
552
+ 'best_model': best_model,
553
+ 'rankings': formatted_rankings,
554
+ 'comparison_metrics': comparison_metrics
555
+ }
556
+
557
+ # Initialize evaluator manager
558
+ evaluator_manager = EvaluatorManager()
559
+
560
+ # Global variables to store uploaded images and results
561
+ uploaded_images = {}
562
+ evaluation_results = {}
563
+
564
+ def evaluate_images(images, model_name, selected_evaluators):
565
+ """
566
+ Evaluate uploaded images using selected evaluators.
567
+
568
+ Args:
569
+ images (list): List of uploaded image files
570
+ model_name (str): Name of the model that generated these images
571
+ selected_evaluators (list): List of evaluator IDs to use
572
+
573
+ Returns:
574
+ str: Status message
575
+ """
576
+ global uploaded_images, evaluation_results
577
+
578
+ if not images:
579
+ return "No images uploaded."
580
+
581
+ if not model_name:
582
+ model_name = "unknown_model"
583
+
584
+ # Save uploaded images
585
+ if model_name not in uploaded_images:
586
+ uploaded_images[model_name] = []
587
+
588
+ image_paths = []
589
+ for img in images:
590
+ # Save image to temporary file
591
+ img_path = f"/tmp/image_evaluator_uploads/{model_name}_{len(uploaded_images[model_name])}.png"
592
+ os.makedirs(os.path.dirname(img_path), exist_ok=True)
593
+ Image.open(img).save(img_path)
594
+
595
+ # Add to uploaded images
596
+ uploaded_images[model_name].append({
597
+ 'path': img_path,
598
+ 'id': f"{model_name}_{len(uploaded_images[model_name])}"
599
+ })
600
+
601
+ image_paths.append(img_path)
602
+
603
+ # Evaluate images
604
+ if not selected_evaluators:
605
+ selected_evaluators = ['technical', 'aesthetic', 'anime_specialized']
606
+
607
+ results = {}
608
+ for i, img_path in enumerate(image_paths):
609
+ img_id = uploaded_images[model_name][i]['id']
610
+ results[img_id] = evaluator_manager.evaluate_image(img_path, selected_evaluators)
611
+
612
+ # Store results
613
+ if model_name not in evaluation_results:
614
+ evaluation_results[model_name] = {}
615
+
616
+ evaluation_results[model_name].update(results)
617
+
618
+ return f"Evaluated {len(images)} images for model '{model_name}'."
619
+
620
+ def compare_models():
621
+ """
622
+ Compare models based on evaluation results.
623
+
624
+ Returns:
625
+ tuple: (comparison table HTML, overall chart, radar chart)
626
+ """
627
+ global evaluation_results
628
+
629
+ if not evaluation_results or len(evaluation_results) < 2:
630
+ return "Need at least two models with evaluated images for comparison.", None, None
631
+
632
+ # Compare models
633
+ comparison = evaluator_manager.compare_models(evaluation_results)
634
+
635
+ # Create comparison table
636
+ models = list(evaluation_results.keys())
637
+ metrics = ['technical', 'aesthetic', 'anime_specialized', 'overall']
638
+
639
+ data = []
640
+ for model in models:
641
+ row = {'Model': model}
642
+ for metric in metrics:
643
+ if metric in comparison['comparison_metrics'] and model in comparison['comparison_metrics'][metric]:
644
+ row[metric.capitalize()] = comparison['comparison_metrics'][metric][model]
645
+ else:
646
+ row[metric.capitalize()] = 0.0
647
+ data.append(row)
648
+
649
+ df = pd.DataFrame(data)
650
+
651
+ # Add ranking information
652
+ for rank_info in comparison['rankings']:
653
+ if rank_info['model'] in df['Model'].values:
654
+ df.loc[df['Model'] == rank_info['model'], 'Rank'] = rank_info['rank']
655
+
656
+ # Sort by rank
657
+ df = df.sort_values('Rank')
658
+
659
+ # Create overall comparison chart
660
+ plt.figure(figsize=(10, 6))
661
+ overall_scores = [comparison['comparison_metrics']['overall'].get(model, 0) for model in models]
662
+ bars = plt.bar(models, overall_scores, color='skyblue')
663
+
664
+ # Add value labels on top of bars
665
+ for bar in bars:
666
+ height = bar.get_height()
667
+ plt.text(bar.get_x() + bar.get_width()/2., height + 0.01,
668
+ f'{height:.2f}', ha='center', va='bottom')
669
+
670
+ plt.title('Overall Quality Scores by Model')
671
+ plt.xlabel('Model')
672
+ plt.ylabel('Score')
673
+ plt.ylim(0, 1.1)
674
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
675
+
676
+ # Save the chart
677
+ overall_chart_path = "/tmp/image_evaluator_results/overall_comparison.png"
678
+ os.makedirs(os.path.dirname(overall_chart_path), exist_ok=True)
679
+ plt.savefig(overall_chart_path)
680
+ plt.close()
681
+
682
+ # Create radar chart
683
+ categories = [m.capitalize() for m in metrics[:-1]] # Exclude 'overall'
684
+ N = len(categories)
685
+
686
+ # Create angles for each metric
687
+ angles = [n / float(N) * 2 * np.pi for n in range(N)]
688
+ angles += angles[:1] # Close the loop
689
+
690
+ # Create radar chart
691
+ plt.figure(figsize=(10, 10))
692
+ ax = plt.subplot(111, polar=True)
693
+
694
+ # Add lines for each model
695
+ colors = plt.cm.tab10(np.linspace(0, 1, len(models)))
696
+
697
+ for i, model in enumerate(models):
698
+ values = [comparison['comparison_metrics'][metric].get(model, 0) for metric in metrics[:-1]]
699
+ values += values[:1] # Close the loop
700
+
701
+ ax.plot(angles, values, linewidth=2, linestyle='solid', label=model, color=colors[i])
702
+ ax.fill(angles, values, alpha=0.1, color=colors[i])
703
+
704
+ # Set category labels
705
+ plt.xticks(angles[:-1], categories)
706
+
707
+ # Set y-axis limits
708
+ ax.set_ylim(0, 1)
709
+
710
+ # Add legend
711
+ plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
712
+
713
+ plt.title('Detailed Metrics Comparison by Model')
714
+
715
+ # Save the chart
716
+ radar_chart_path = "/tmp/image_evaluator_results/radar_comparison.png"
717
+ plt.savefig(radar_chart_path)
718
+ plt.close()
719
+
720
+ # Create result message
721
+ result_message = f"Best model: {comparison['best_model']}\n\nModel rankings:\n"
722
+ for rank in comparison['rankings']:
723
+ result_message += f"{rank['rank']}. {rank['model']} (score: {rank['score']:.2f})\n"
724
+
725
+ return result_message, overall_chart_path, radar_chart_path
726
+
727
+ def export_results(format_type):
728
+ """
729
+ Export evaluation results to file.
730
+
731
+ Args:
732
+ format_type (str): Export format ('csv', 'json', or 'html')
733
+
734
+ Returns:
735
+ str: Path to exported file
736
+ """
737
+ global evaluation_results
738
+
739
+ if not evaluation_results:
740
+ return "No evaluation results to export."
741
+
742
+ # Create output directory
743
+ output_dir = "/tmp/image_evaluator_results"
744
+ os.makedirs(output_dir, exist_ok=True)
745
+
746
+ # Compare models if multiple models are available
747
+ if len(evaluation_results) >= 2:
748
+ comparison = evaluator_manager.compare_models(evaluation_results)
749
+ else:
750
+ comparison = None
751
+
752
+ # Create DataFrame for the results
753
+ models = list(evaluation_results.keys())
754
+ metrics = ['technical', 'aesthetic', 'anime_specialized', 'overall']
755
+
756
+ if comparison:
757
+ data = []
758
+ for model in models:
759
+ row = {'Model': model}
760
+ for metric in metrics:
761
+ if metric in comparison['comparison_metrics'] and model in comparison['comparison_metrics'][metric]:
762
+ row[metric.capitalize()] = comparison['comparison_metrics'][metric][model]
763
+ else:
764
+ row[metric.capitalize()] = 0.0
765
+ data.append(row)
766
+
767
+ df = pd.DataFrame(data)
768
+
769
+ # Add ranking information
770
+ for rank_info in comparison['rankings']:
771
+ if rank_info['model'] in df['Model'].values:
772
+ df.loc[df['Model'] == rank_info['model'], 'Rank'] = rank_info['rank']
773
+
774
+ # Sort by rank
775
+ df = df.sort_values('Rank')
776
+ else:
777
+ # Single model, create detailed results
778
+ model = models[0]
779
+ data = []
780
+
781
+ for img_id, results in evaluation_results[model].items():
782
+ row = {'Image': img_id}
783
+
784
+ for evaluator_id, evaluator_results in results.items():
785
+ for metric, value in evaluator_results.items():
786
+ row[f"{evaluator_id}_{metric}"] = value
787
+
788
+ data.append(row)
789
+
790
+ df = pd.DataFrame(data)
791
+
792
+ # Export based on format
793
+ if format_type == 'csv':
794
+ output_path = os.path.join(output_dir, 'evaluation_results.csv')
795
+ df.to_csv(output_path, index=False)
796
+ elif format_type == 'json':
797
+ output_path = os.path.join(output_dir, 'evaluation_results.json')
798
+
799
+ if comparison:
800
+ export_data = {
801
+ 'comparison': comparison,
802
+ 'results': evaluation_results
803
+ }
804
+ else:
805
+ export_data = evaluation_results
806
+
807
+ with open(output_path, 'w') as f:
808
+ json.dump(export_data, f, indent=2)
809
+ elif format_type == 'html':
810
+ output_path = os.path.join(output_dir, 'evaluation_results.html')
811
+ df.to_html(output_path, index=False)
812
+ else:
813
+ return f"Unsupported format: {format_type}"
814
+
815
+ return output_path
816
+
817
+ def reset_data():
818
+ """Reset all uploaded images and evaluation results."""
819
+ global uploaded_images, evaluation_results
820
+ uploaded_images = {}
821
+ evaluation_results = {}
822
+ return "All data has been reset."
823
+
824
+ def create_interface():
825
+ """Create Gradio interface."""
826
+ # Get available evaluators
827
+ available_evaluators = evaluator_manager.get_available_evaluators()
828
+ evaluator_choices = [e['id'] for e in available_evaluators]
829
+
830
+ with gr.Blocks(title="Image Evaluator") as interface:
831
+ gr.Markdown("# Image Evaluator")
832
+ gr.Markdown("Tool for evaluating and comparing images generated by different AI models")
833
+
834
+ with gr.Tab("Upload & Evaluate"):
835
+ with gr.Row():
836
+ with gr.Column():
837
+ images_input = gr.File(file_count="multiple", label="Upload Images")
838
+ model_name_input = gr.Textbox(label="Model Name", placeholder="Enter model name")
839
+ evaluator_select = gr.CheckboxGroup(choices=evaluator_choices, label="Select Evaluators", value=evaluator_choices)
840
+ evaluate_button = gr.Button("Evaluate Images")
841
+
842
+ with gr.Column():
843
+ evaluation_output = gr.Textbox(label="Evaluation Status")
844
+
845
+ evaluate_button.click(
846
+ evaluate_images,
847
+ inputs=[images_input, model_name_input, evaluator_select],
848
+ outputs=evaluation_output
849
+ )
850
+
851
+ with gr.Tab("Compare Models"):
852
+ with gr.Row():
853
+ compare_button = gr.Button("Compare Models")
854
+
855
+ with gr.Row():
856
+ with gr.Column():
857
+ comparison_output = gr.Textbox(label="Comparison Results")
858
+
859
+ with gr.Column():
860
+ overall_chart = gr.Image(label="Overall Scores")
861
+ radar_chart = gr.Image(label="Detailed Metrics")
862
+
863
+ compare_button.click(
864
+ compare_models,
865
+ inputs=[],
866
+ outputs=[comparison_output, overall_chart, radar_chart]
867
+ )
868
+
869
+ with gr.Tab("Export Results"):
870
+ with gr.Row():
871
+ format_select = gr.Radio(choices=["csv", "json", "html"], label="Export Format", value="csv")
872
+ export_button = gr.Button("Export Results")
873
+
874
+ with gr.Row():
875
+ export_output = gr.Textbox(label="Export Status")
876
+
877
+ export_button.click(
878
+ export_results,
879
+ inputs=[format_select],
880
+ outputs=export_output
881
+ )
882
+
883
+ with gr.Tab("Help"):
884
+ gr.Markdown("""
885
+ ## How to Use Image Evaluator
886
+
887
+ ### Step 1: Upload Images
888
+ - Go to the "Upload & Evaluate" tab
889
+ - Upload images for a specific model
890
+ - Enter the model name
891
+ - Select which evaluators to use
892
+ - Click "Evaluate Images"
893
+ - Repeat for each model you want to compare
894
+
895
+ ### Step 2: Compare Models
896
+ - Go to the "Compare Models" tab
897
+ - Click "Compare Models" to see results
898
+ - The best model will be highlighted
899
+ - View charts for visual comparison
900
+
901
+ ### Step 3: Export Results
902
+ - Go to the "Export Results" tab
903
+ - Select export format (CSV, JSON, or HTML)
904
+ - Click "Export Results"
905
+ - Download the exported file
906
+
907
+ ### Available Metrics
908
+
909
+ #### Technical Metrics
910
+ - Sharpness: Measures image clarity and detail
911
+ - Noise: Measures absence of unwanted variations
912
+ - Artifacts: Measures absence of compression artifacts
913
+ - Saturation: Measures color intensity
914
+ - Contrast: Measures difference between light and dark areas
915
+
916
+ #### Aesthetic Metrics
917
+ - Color Harmony: Measures how well colors work together
918
+ - Composition: Measures adherence to compositional principles
919
+ - Visual Interest: Measures how visually engaging the image is
920
+
921
+ #### Anime-Specific Metrics
922
+ - Line Quality: Measures clarity and quality of line work
923
+ - Color Palette: Evaluates color choices for anime style
924
+ - Character Quality: Assesses character design and rendering
925
+ - Style Consistency: Measures adherence to anime style conventions
926
+ """)
927
+
928
+ with gr.Row():
929
+ reset_button = gr.Button("Reset All Data")
930
+ reset_output = gr.Textbox(label="Reset Status")
931
+
932
+ reset_button.click(
933
+ reset_data,
934
+ inputs=[],
935
+ outputs=reset_output
936
+ )
937
+
938
+ return interface
939
+
940
+ # Create and launch the interface
941
+ interface = create_interface()
942
+
943
+ if __name__ == "__main__":
944
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==3.50.2
2
+ numpy==1.21.0
3
+ opencv-python==4.5.3.56
4
+ pillow==8.3.1
5
+ torch==1.9.0
6
+ torchvision==0.10.0
7
+ pandas==1.3.0
8
+ matplotlib==3.4.2
9
+ tqdm==4.61.2
10
+ scikit-image==0.18.2