Commit
Β·
56f0aa0
1
Parent(s):
0931cca
init submision 2
Browse files
medvqa/submission_samples/gi-2025/submission_task2.py
CHANGED
@@ -1 +1,278 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
from datasets import Dataset
|
3 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
4 |
+
from scipy.linalg import sqrtm
|
5 |
+
from scipy.spatial.distance import pdist
|
6 |
+
from PIL import Image
|
7 |
+
import numpy as np
|
8 |
+
import os
|
9 |
+
from diffusers import DiffusionPipeline
|
10 |
+
from datasets import load_dataset
|
11 |
+
from transformers import AutoProcessor, AutoModel
|
12 |
+
import torch
|
13 |
+
import json
|
14 |
+
import time
|
15 |
+
from tqdm import tqdm
|
16 |
+
import subprocess
|
17 |
+
import platform
|
18 |
+
import sys
|
19 |
+
import requests
|
20 |
+
|
21 |
+
jsons__ = requests.get(
|
22 |
+
"https://huggingface.co/datasets/SimulaMet/Kvasir-VQA-test/resolve/main/imagen-test").json()
|
23 |
+
test_prompts = [c for qa in jsons__.values()
|
24 |
+
for pair in qa.values() for c in pair]
|
25 |
+
gpu_name = torch.cuda.get_device_name(
|
26 |
+
0) if torch.cuda.is_available() else "cpu"
|
27 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
28 |
+
|
29 |
+
|
30 |
+
def get_mem(): return torch.cuda.memory_allocated(device) / \
|
31 |
+
(1024 ** 2) if torch.cuda.is_available() else 0
|
32 |
+
|
33 |
+
|
34 |
+
initial_mem = get_mem()
|
35 |
+
|
36 |
+
# βοΈβοΈ--------EDIT SECTION 1: SUBMISISON DETAILS and MODEL LOADING --------βοΈβοΈ#
|
37 |
+
|
38 |
+
SUBMISSION_INFO = {
|
39 |
+
# πΉ TODO: PARTICIPANTS MUST ADD PROPER SUBMISSION INFO FOR THE SUBMISSION πΉ
|
40 |
+
# This will be visible to the organizers
|
41 |
+
# DONT change the keys, only add your info
|
42 |
+
"Participant_Names": "Sushant Gautam, Steven Hicks and Vajita Thambawita",
|
43 |
+
"Affiliations": "SimulaMet",
|
44 |
+
"Contact_emails": ["[email protected]", "[email protected]"],
|
45 |
+
# But, the first email only will be used for correspondance
|
46 |
+
"Team_Name": "SimulaMetmedVQA Rangers",
|
47 |
+
"Country": "Norway",
|
48 |
+
"Notes_to_organizers": '''
|
49 |
+
eg, We have finetund XXX model
|
50 |
+
This is optional . .
|
51 |
+
Used data augmentations . .
|
52 |
+
Custom info about the model . .
|
53 |
+
Any insights. .
|
54 |
+
+ Any informal things you like to share about this submission.
|
55 |
+
'''
|
56 |
+
}
|
57 |
+
# πΉ TODO: PARTICIPANTS MUST LOAD THEIR MODEL HERE, EDIT AS NECESSARY FOR YOUR MODEL πΉ
|
58 |
+
# can add necessary library imports here
|
59 |
+
|
60 |
+
hf_pipe = DiffusionPipeline.from_pretrained(
|
61 |
+
"runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to(device)
|
62 |
+
hf_pipe.load_lora_weights("waitwhoami/sd-kvasir-imagen-demo")
|
63 |
+
hf_pipe.safety_checker = lambda images, clip_input: (images, False)
|
64 |
+
|
65 |
+
# π----------------END SUBMISISON DETAILS and MODEL LOADING -----------------π#
|
66 |
+
|
67 |
+
start_time, post_model_mem = time.time(), get_mem()
|
68 |
+
total_time, final_mem = round(
|
69 |
+
time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
|
70 |
+
model_mem_used = round(post_model_mem - initial_mem, 2)
|
71 |
+
num_per_prompt = 10
|
72 |
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
73 |
+
output_folder = f"generated_images_{timestamp}"
|
74 |
+
# Ensure output folder exists
|
75 |
+
os.makedirs(output_folder, exist_ok=True)
|
76 |
+
|
77 |
+
# βοΈβοΈ___________EDIT SECTION 2: IMAGE GENERATION___________βοΈβοΈ#
|
78 |
+
# πΉ TODO: PARTICIPANTS SHOULD MODIFY THIS STEP πΉ
|
79 |
+
# you have access to 'test_prompts' with all the prompts needed to be generated
|
80 |
+
|
81 |
+
batch_size = 2 # Adjust based on your GPU memory
|
82 |
+
|
83 |
+
for i in range(0, len(test_prompts), batch_size):
|
84 |
+
batch = test_prompts[i:i + batch_size]
|
85 |
+
batched_prompts = [p for p in batch for _ in range(num_per_prompt)]
|
86 |
+
images = hf_pipe(batched_prompts).images
|
87 |
+
for j, img in enumerate(images):
|
88 |
+
p_idx = i + j // num_per_prompt + 1
|
89 |
+
i_idx = j % num_per_prompt + 1
|
90 |
+
img.save(f"{output_folder}/prompt{p_idx:04d}_img{i_idx:04d}.png")
|
91 |
+
# make sure 'output_folder' with generated images is available with proper filenames
|
92 |
+
|
93 |
+
# π________________ END IMAGE GENERATION ________________π#
|
94 |
+
|
95 |
+
# β DO NOT EDIT any lines below from here, can edit only upto decoding step above as required. β
|
96 |
+
# Ensures answer is a string
|
97 |
+
|
98 |
+
saved_files = [f for f in os.listdir(output_folder) if f.endswith('.png')]
|
99 |
+
expected_count = len(test_prompts) * num_per_prompt
|
100 |
+
|
101 |
+
assert len(
|
102 |
+
saved_files) == expected_count, f"Expected {expected_count} images, but found {len(saved_files)}."
|
103 |
+
|
104 |
+
total_time, final_mem = round(
|
105 |
+
time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
|
106 |
+
model_mem_used = round(post_model_mem - initial_mem, 2)
|
107 |
+
|
108 |
+
# start calculating metrics
|
109 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
110 |
+
|
111 |
+
# weights = Inception_V3_Weights.DEFAULT
|
112 |
+
# inception = inception_v3(weights=weights).to(DEVICE)
|
113 |
+
# inception.eval()
|
114 |
+
|
115 |
+
# # --- Preprocessing ---
|
116 |
+
# IMG_SIZE = 299
|
117 |
+
# preprocess = transforms.Compose([
|
118 |
+
# transforms.Resize((IMG_SIZE, IMG_SIZE)),
|
119 |
+
# transforms.ToTensor(),
|
120 |
+
# transforms.Normalize([0.5]*3, [0.5]*3),
|
121 |
+
# ])
|
122 |
+
|
123 |
+
modelx = AutoModel.from_pretrained(
|
124 |
+
"ikim-uk-essen/BiomedCLIP_ViT_patch16_224", trust_remote_code=True).to(DEVICE)
|
125 |
+
processor = AutoProcessor.from_pretrained(
|
126 |
+
"ikim-uk-essen/BiomedCLIP_ViT_patch16_224", trust_remote_code=True)
|
127 |
+
modelx.eval()
|
128 |
+
|
129 |
+
|
130 |
+
def extract_features(batch):
|
131 |
+
inputs = processor(images=batch['image'], return_tensors="pt").to(DEVICE)
|
132 |
+
with torch.no_grad():
|
133 |
+
feats = modelx(**inputs).pooler_output
|
134 |
+
feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
|
135 |
+
return {'features': feats.cpu().numpy()}
|
136 |
+
|
137 |
+
|
138 |
+
def extract_features_from_paths(image_paths, batch_size=32):
|
139 |
+
imgs = [Image.open(p).convert('RGB') for p in image_paths]
|
140 |
+
dataset = Dataset.from_dict({'image': imgs})
|
141 |
+
dataset = dataset.map(extract_features, batched=True,
|
142 |
+
batch_size=batch_size)
|
143 |
+
return np.vstack(dataset['features'])
|
144 |
+
|
145 |
+
|
146 |
+
def fid_score(feat1, feat2):
|
147 |
+
mu1, mu2 = feat1.mean(0), feat2.mean(0)
|
148 |
+
sigma1, sigma2 = np.cov(feat1, rowvar=False), np.cov(feat2, rowvar=False)
|
149 |
+
covmean = sqrtm(sigma1 @ sigma2).real
|
150 |
+
return ((mu1 - mu2)**2).sum() + np.trace(sigma1 + sigma2 - 2 * covmean)
|
151 |
+
|
152 |
+
|
153 |
+
def diversity_score(features):
|
154 |
+
return pdist(features).mean()
|
155 |
+
|
156 |
+
|
157 |
+
def mean_cosine_sim(feat1, feat2):
|
158 |
+
return cosine_similarity(feat1, feat2).mean()
|
159 |
+
|
160 |
+
|
161 |
+
# --- Organize generated images ---
|
162 |
+
generated_files = sorted(
|
163 |
+
[f for f in os.listdir(output_folder) if f.endswith(".png")])
|
164 |
+
prompt_to_images = {}
|
165 |
+
for f in generated_files:
|
166 |
+
prompt_idx = int(f.split("_")[0].replace("prompt", ""))
|
167 |
+
prompt_to_images.setdefault(prompt_idx, []).append(
|
168 |
+
os.path.join(output_folder, f))
|
169 |
+
|
170 |
+
all_features = {}
|
171 |
+
for prompt_idx, paths in tqdm(prompt_to_images.items(), desc="Extracting generated image's features"):
|
172 |
+
all_features[prompt_idx] = extract_features_from_paths(paths)
|
173 |
+
|
174 |
+
val_dataset = load_dataset("SimulaMet/Kvasir-VQA-test", split="validation")
|
175 |
+
prompt_to_real = requests.get(
|
176 |
+
"https://huggingface.co/datasets/SimulaMet/Kvasir-VQA-test/resolve/main/real_mapping").json()
|
177 |
+
|
178 |
+
print("Now, extracting real image's features...")
|
179 |
+
seen = set()
|
180 |
+
real_features_cache_ = val_dataset.filter(lambda x: x["img_id"] not in seen and not seen.add(x["img_id"])).map(
|
181 |
+
extract_features,
|
182 |
+
batched=True,
|
183 |
+
batch_size=128
|
184 |
+
)
|
185 |
+
real_features_cache = {
|
186 |
+
image_id: feature
|
187 |
+
for image_id, feature in zip(real_features_cache_["img_id"], real_features_cache_["features"])
|
188 |
+
}
|
189 |
+
|
190 |
+
|
191 |
+
# --- Pair prompts: (0,1), (2,3), ...
|
192 |
+
sorted_prompts = sorted(all_features.keys())
|
193 |
+
objectives = []
|
194 |
+
for i in range(0, len(sorted_prompts)//2, 2):
|
195 |
+
idx_A = sorted_prompts[i]
|
196 |
+
idx_B = sorted_prompts[i + 1]
|
197 |
+
A = all_features[idx_A]
|
198 |
+
B = all_features[idx_B]
|
199 |
+
objectives.append((idx_A, idx_B, A, B))
|
200 |
+
|
201 |
+
# --- Per-objective Metrics ---
|
202 |
+
fids, agreements, diversities = [], [], []
|
203 |
+
all_generated, all_real = [], []
|
204 |
+
per_prompt_data = []
|
205 |
+
|
206 |
+
for idx_A, idx_B, A, B in tqdm(objectives, desc="Scoring"):
|
207 |
+
sim_ab = mean_cosine_sim(A, B)
|
208 |
+
fid_ab = fid_score(A, B)
|
209 |
+
div_A = diversity_score(A)
|
210 |
+
div_B = diversity_score(B)
|
211 |
+
|
212 |
+
# Shared real reference for both prompts
|
213 |
+
# same as prompt_to_real[str(idx_B)]
|
214 |
+
real_keys = prompt_to_real[str(idx_A)]
|
215 |
+
# flag by SUSHANT, just to debug ;)
|
216 |
+
# real_keys = random.sample(val_dataset['img_id'], len(real_keys))
|
217 |
+
real_feats = np.array([real_features_cache[key] for key in real_keys])
|
218 |
+
fid_A_real = fid_score(A, real_feats)
|
219 |
+
fid_B_real = fid_score(B, real_feats)
|
220 |
+
|
221 |
+
# Collect for global metrics
|
222 |
+
all_generated.extend([*A, *B])
|
223 |
+
all_real.extend(real_feats)
|
224 |
+
|
225 |
+
fids.append((fid_A_real + fid_B_real) / 2)
|
226 |
+
agreements.append(sim_ab)
|
227 |
+
diversities.extend([div_A, div_B])
|
228 |
+
|
229 |
+
per_prompt_data.append({
|
230 |
+
"Prompt A": idx_A,
|
231 |
+
"Prompt B": idx_B,
|
232 |
+
"FID(A,B)": fid_ab,
|
233 |
+
"Agreement": sim_ab,
|
234 |
+
"Diversity A": div_A,
|
235 |
+
"Diversity B": div_B,
|
236 |
+
"FID A vs Real": fid_A_real,
|
237 |
+
"FID B vs Real": fid_B_real,
|
238 |
+
"Real Ref": real_feats
|
239 |
+
})
|
240 |
+
|
241 |
+
# --- Global FID ---
|
242 |
+
all_generated = np.array(all_generated)
|
243 |
+
all_real = np.array(all_real)
|
244 |
+
global_fid = fid_score(all_generated, all_real)
|
245 |
+
|
246 |
+
# --- Global Scores ---
|
247 |
+
fidelity_norm = np.mean(100 / (1 + np.array(fids)))
|
248 |
+
agreement_norm = np.mean(agreements)
|
249 |
+
diversity_norm = np.mean(diversities)
|
250 |
+
# final_score = 0.5 * fidelity_norm + 0.3 * agreement_norm + 0.2 * diversity_norm #lets not use this for now
|
251 |
+
|
252 |
+
# --- Output ---
|
253 |
+
public_scores = {
|
254 |
+
"fidelity": float(fidelity_norm),
|
255 |
+
"agreement": float(agreement_norm),
|
256 |
+
"diversity": float(diversity_norm),
|
257 |
+
"FBD": float(global_fid)
|
258 |
+
}
|
259 |
+
|
260 |
+
|
261 |
+
# end calculating metrics
|
262 |
+
output_data = {"submission_info": SUBMISSION_INFO, "public_scores": public_scores, "total_time": total_time, "time_per_item": total_time / len(val_dataset),
|
263 |
+
"memory_used_mb": final_mem, "model_memory_mb": model_mem_used, "gpu_name": gpu_name, "predictions": all_features, "debug": {
|
264 |
+
"packages": json.loads(subprocess.check_output([sys.executable, "-m", "pip", "list", "--format=json"])),
|
265 |
+
"system": {
|
266 |
+
"python": platform.python_version(),
|
267 |
+
"os": platform.system(),
|
268 |
+
"platform": platform.platform(),
|
269 |
+
"arch": platform.machine()
|
270 |
+
}}}
|
271 |
+
|
272 |
+
|
273 |
+
with open("predictions_2.json", "w") as f:
|
274 |
+
json.dump(output_data, f, indent=4)
|
275 |
+
print(f"Time: {total_time}s | Mem: {final_mem}MB | Model Load Mem: {model_mem_used}MB | GPU: {gpu_name}")
|
276 |
+
print("β
Scripts Looks Good! Generation process completed successfully. Results saved to 'predictions_2.json'.")
|
277 |
+
print("Next Step:\n 1) Upload this submission_task2.py script file to HuggingFace model repository.")
|
278 |
+
print('''\n 2) Make a submission to the competition:\n Run:: medvqa validate_and_submit --competition=gi-2025 --task=2 --repo_id=...''')
|