Spaces:
Runtime error
Runtime error
File size: 7,489 Bytes
0c8d55e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import megfile
import os
import pandas as pd
from collections import defaultdict
import sys
import numpy as np
import math
GROUPS = [
"background_change", "color_alter", "material_alter", "motion_change", "ps_human", "style_change", "subject-add", "subject-remove", "subject-replace", "text_change", "tone_transfer"
]
def analyze_scores(save_path_dir, evaluate_group, language):
results = defaultdict(dict)
save_path_new = save_path_dir
model_total_score = defaultdict(dict)
group_dict_sub = {}
group_scores_semantics = defaultdict(lambda: defaultdict(list))
group_scores_quality = defaultdict(lambda: defaultdict(list))
group_scores_overall = defaultdict(lambda: defaultdict(list))
group_scores_semantics_intersection = defaultdict(lambda: defaultdict(list))
group_scores_quality_intersection = defaultdict(lambda: defaultdict(list))
group_scores_overall_intersection = defaultdict(lambda: defaultdict(list))
length_total = 0
save_path_dir_raw = save_path_dir
for group_name in GROUPS:
csv_path = os.path.join(save_path_new, f"{evaluate_group[0]}_{group_name}_gpt_score.csv")
csv_file = megfile.smart_open(csv_path)
df = pd.read_csv(csv_file)
filtered_semantics_scores = []
filtered_quality_scores = []
filtered_overall_scores = []
filtered_semantics_scores_intersection = []
filtered_quality_scores_intersection = []
filtered_overall_scores_intersection = []
for _, row in df.iterrows():
source_image = row['source_image']
edited_image = row['edited_image']
instruction = row['instruction']
semantics_score = row['sementics_score']
quality_score = row['quality_score']
intersection_exist = row['intersection_exist']
instruction_language = row['instruction_language']
if instruction_language == language:
pass
else:
continue
overall_score = math.sqrt(semantics_score * quality_score)
filtered_semantics_scores.append(semantics_score)
filtered_quality_scores.append(quality_score)
filtered_overall_scores.append(overall_score)
if intersection_exist:
filtered_semantics_scores_intersection.append(semantics_score)
filtered_quality_scores_intersection.append(quality_score)
filtered_overall_scores_intersection.append(overall_score)
avg_semantics_score = np.mean(filtered_semantics_scores)
avg_quality_score = np.mean(filtered_quality_scores)
avg_overall_score = np.mean(filtered_overall_scores)
group_scores_semantics[evaluate_group[0]][group_name] = avg_semantics_score
group_scores_quality[evaluate_group[0]][group_name] = avg_quality_score
group_scores_overall[evaluate_group[0]][group_name] = avg_overall_score
avg_semantics_score_intersection = np.mean(filtered_semantics_scores_intersection)
avg_quality_score_intersection = np.mean(filtered_quality_scores_intersection)
avg_overall_score_intersection = np.mean(filtered_overall_scores_intersection)
group_scores_semantics_intersection[evaluate_group[0]][group_name] = avg_semantics_score_intersection
group_scores_quality_intersection[evaluate_group[0]][group_name] = avg_quality_score_intersection
group_scores_overall_intersection[evaluate_group[0]][group_name] = avg_overall_score_intersection
print("\n--- Overall Model Averages ---")
print("\nSemantics:")
for model_name in evaluate_group:
model_scores = [group_scores_semantics[model_name][group] for group in GROUPS]
model_avg = np.mean(model_scores)
group_scores_semantics[model_name]["avg_semantics"] = model_avg
print("\nSemantics Intersection:")
for model_name in evaluate_group:
model_scores = [group_scores_semantics_intersection[model_name][group] for group in GROUPS]
model_avg = np.mean(model_scores)
group_scores_semantics_intersection[model_name]["avg_semantics"] = model_avg
print("\nQuality:")
for model_name in evaluate_group:
model_scores = [group_scores_quality[model_name][group] for group in GROUPS]
model_avg = np.mean(model_scores)
group_scores_quality[model_name]["avg_quality"] = model_avg
print("\nQuality Intersection:")
for model_name in evaluate_group:
model_scores = [group_scores_quality_intersection[model_name][group] for group in GROUPS]
model_avg = np.mean(model_scores)
group_scores_quality_intersection[model_name]["avg_quality"] = model_avg
print("\nOverall:")
for model_name in evaluate_group:
model_scores = [group_scores_overall[model_name][group] for group in GROUPS]
model_avg = np.mean(model_scores)
group_scores_overall[model_name]["avg_overall"] = model_avg
print("\nOverall Intersection:")
for model_name in evaluate_group:
model_scores = [group_scores_overall_intersection[model_name][group] for group in GROUPS]
model_avg = np.mean(model_scores)
group_scores_overall_intersection[model_name]["avg_overall"] = model_avg
return group_scores_semantics, group_scores_quality, group_scores_overall, group_scores_semantics_intersection, group_scores_quality_intersection, group_scores_overall_intersection
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, default="UniWorld")
parser.add_argument("--save_path", type=str, default="/mnt/data/lb/Remake/UniWorld//eval_output/stage3_ema/Gedit")
parser.add_argument("--backbone", type=str, default="gpt4o", choices=["gpt4o", "qwen25vl"])
parser.add_argument("--language", type=str, default="en", choices=["en", "zh"])
args = parser.parse_args()
model_name = args.model_name
save_path_dir = args.save_path
evaluate_group = [args.model_name]
backbone = args.backbone
save_path_new = os.path.join(save_path_dir, backbone, "eval_results_new")
print("\nOverall:")
for model_name in evaluate_group:
group_scores_semantics, group_scores_quality, group_scores_overall, group_scores_semantics_intersection, group_scores_quality_intersection, group_scores_overall_intersection = analyze_scores(save_path_new, [model_name], language=args.language)
for group_name in GROUPS:
print(f"{group_name}: {group_scores_semantics[model_name][group_name]:.3f}, {group_scores_quality[model_name][group_name]:.3f}, {group_scores_overall[model_name][group_name]:.3f}")
print(f"Average: {group_scores_semantics[model_name]['avg_semantics']:.3f}, {group_scores_quality[model_name]['avg_quality']:.3f}, {group_scores_overall[model_name]['avg_overall']:.3f}")
print("\nIntersection:")
for group_name in GROUPS:
print(f"{group_name}: {group_scores_semantics_intersection[model_name][group_name]:.3f}, {group_scores_quality_intersection[model_name][group_name]:.3f}, {group_scores_overall_intersection[model_name][group_name]:.3f}")
print(f"Average Intersection: {group_scores_semantics_intersection[model_name]['avg_semantics']:.3f}, {group_scores_quality_intersection[model_name]['avg_quality']:.3f}, {group_scores_overall_intersection[model_name]['avg_overall']:.3f}")
|