SushantGautam commited on
Commit
56f0aa0
Β·
1 Parent(s): 0931cca

init submision 2

Browse files
medvqa/submission_samples/gi-2025/submission_task2.py CHANGED
@@ -1 +1,278 @@
1
- print("Coming soon, stay tuned!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from datasets import Dataset
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ from scipy.linalg import sqrtm
5
+ from scipy.spatial.distance import pdist
6
+ from PIL import Image
7
+ import numpy as np
8
+ import os
9
+ from diffusers import DiffusionPipeline
10
+ from datasets import load_dataset
11
+ from transformers import AutoProcessor, AutoModel
12
+ import torch
13
+ import json
14
+ import time
15
+ from tqdm import tqdm
16
+ import subprocess
17
+ import platform
18
+ import sys
19
+ import requests
20
+
21
+ jsons__ = requests.get(
22
+ "https://huggingface.co/datasets/SimulaMet/Kvasir-VQA-test/resolve/main/imagen-test").json()
23
+ test_prompts = [c for qa in jsons__.values()
24
+ for pair in qa.values() for c in pair]
25
+ gpu_name = torch.cuda.get_device_name(
26
+ 0) if torch.cuda.is_available() else "cpu"
27
+ device = "cuda" if torch.cuda.is_available() else "cpu"
28
+
29
+
30
+ def get_mem(): return torch.cuda.memory_allocated(device) / \
31
+ (1024 ** 2) if torch.cuda.is_available() else 0
32
+
33
+
34
+ initial_mem = get_mem()
35
+
36
+ # ✏️✏️--------EDIT SECTION 1: SUBMISISON DETAILS and MODEL LOADING --------✏️✏️#
37
+
38
+ SUBMISSION_INFO = {
39
+ # πŸ”Ή TODO: PARTICIPANTS MUST ADD PROPER SUBMISSION INFO FOR THE SUBMISSION πŸ”Ή
40
+ # This will be visible to the organizers
41
+ # DONT change the keys, only add your info
42
+ "Participant_Names": "Sushant Gautam, Steven Hicks and Vajita Thambawita",
43
+ "Affiliations": "SimulaMet",
44
+ "Contact_emails": ["[email protected]", "[email protected]"],
45
+ # But, the first email only will be used for correspondance
46
+ "Team_Name": "SimulaMetmedVQA Rangers",
47
+ "Country": "Norway",
48
+ "Notes_to_organizers": '''
49
+ eg, We have finetund XXX model
50
+ This is optional . .
51
+ Used data augmentations . .
52
+ Custom info about the model . .
53
+ Any insights. .
54
+ + Any informal things you like to share about this submission.
55
+ '''
56
+ }
57
+ # πŸ”Ή TODO: PARTICIPANTS MUST LOAD THEIR MODEL HERE, EDIT AS NECESSARY FOR YOUR MODEL πŸ”Ή
58
+ # can add necessary library imports here
59
+
60
+ hf_pipe = DiffusionPipeline.from_pretrained(
61
+ "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16).to(device)
62
+ hf_pipe.load_lora_weights("waitwhoami/sd-kvasir-imagen-demo")
63
+ hf_pipe.safety_checker = lambda images, clip_input: (images, False)
64
+
65
+ # 🏁----------------END SUBMISISON DETAILS and MODEL LOADING -----------------🏁#
66
+
67
+ start_time, post_model_mem = time.time(), get_mem()
68
+ total_time, final_mem = round(
69
+ time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
70
+ model_mem_used = round(post_model_mem - initial_mem, 2)
71
+ num_per_prompt = 10
72
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
73
+ output_folder = f"generated_images_{timestamp}"
74
+ # Ensure output folder exists
75
+ os.makedirs(output_folder, exist_ok=True)
76
+
77
+ # ✏️✏️___________EDIT SECTION 2: IMAGE GENERATION___________✏️✏️#
78
+ # πŸ”Ή TODO: PARTICIPANTS SHOULD MODIFY THIS STEP πŸ”Ή
79
+ # you have access to 'test_prompts' with all the prompts needed to be generated
80
+
81
+ batch_size = 2 # Adjust based on your GPU memory
82
+
83
+ for i in range(0, len(test_prompts), batch_size):
84
+ batch = test_prompts[i:i + batch_size]
85
+ batched_prompts = [p for p in batch for _ in range(num_per_prompt)]
86
+ images = hf_pipe(batched_prompts).images
87
+ for j, img in enumerate(images):
88
+ p_idx = i + j // num_per_prompt + 1
89
+ i_idx = j % num_per_prompt + 1
90
+ img.save(f"{output_folder}/prompt{p_idx:04d}_img{i_idx:04d}.png")
91
+ # make sure 'output_folder' with generated images is available with proper filenames
92
+
93
+ # 🏁________________ END IMAGE GENERATION ________________🏁#
94
+
95
+ # β›” DO NOT EDIT any lines below from here, can edit only upto decoding step above as required. β›”
96
+ # Ensures answer is a string
97
+
98
+ saved_files = [f for f in os.listdir(output_folder) if f.endswith('.png')]
99
+ expected_count = len(test_prompts) * num_per_prompt
100
+
101
+ assert len(
102
+ saved_files) == expected_count, f"Expected {expected_count} images, but found {len(saved_files)}."
103
+
104
+ total_time, final_mem = round(
105
+ time.time() - start_time, 4), round(get_mem() - post_model_mem, 2)
106
+ model_mem_used = round(post_model_mem - initial_mem, 2)
107
+
108
+ # start calculating metrics
109
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
110
+
111
+ # weights = Inception_V3_Weights.DEFAULT
112
+ # inception = inception_v3(weights=weights).to(DEVICE)
113
+ # inception.eval()
114
+
115
+ # # --- Preprocessing ---
116
+ # IMG_SIZE = 299
117
+ # preprocess = transforms.Compose([
118
+ # transforms.Resize((IMG_SIZE, IMG_SIZE)),
119
+ # transforms.ToTensor(),
120
+ # transforms.Normalize([0.5]*3, [0.5]*3),
121
+ # ])
122
+
123
+ modelx = AutoModel.from_pretrained(
124
+ "ikim-uk-essen/BiomedCLIP_ViT_patch16_224", trust_remote_code=True).to(DEVICE)
125
+ processor = AutoProcessor.from_pretrained(
126
+ "ikim-uk-essen/BiomedCLIP_ViT_patch16_224", trust_remote_code=True)
127
+ modelx.eval()
128
+
129
+
130
+ def extract_features(batch):
131
+ inputs = processor(images=batch['image'], return_tensors="pt").to(DEVICE)
132
+ with torch.no_grad():
133
+ feats = modelx(**inputs).pooler_output
134
+ feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
135
+ return {'features': feats.cpu().numpy()}
136
+
137
+
138
+ def extract_features_from_paths(image_paths, batch_size=32):
139
+ imgs = [Image.open(p).convert('RGB') for p in image_paths]
140
+ dataset = Dataset.from_dict({'image': imgs})
141
+ dataset = dataset.map(extract_features, batched=True,
142
+ batch_size=batch_size)
143
+ return np.vstack(dataset['features'])
144
+
145
+
146
+ def fid_score(feat1, feat2):
147
+ mu1, mu2 = feat1.mean(0), feat2.mean(0)
148
+ sigma1, sigma2 = np.cov(feat1, rowvar=False), np.cov(feat2, rowvar=False)
149
+ covmean = sqrtm(sigma1 @ sigma2).real
150
+ return ((mu1 - mu2)**2).sum() + np.trace(sigma1 + sigma2 - 2 * covmean)
151
+
152
+
153
+ def diversity_score(features):
154
+ return pdist(features).mean()
155
+
156
+
157
+ def mean_cosine_sim(feat1, feat2):
158
+ return cosine_similarity(feat1, feat2).mean()
159
+
160
+
161
+ # --- Organize generated images ---
162
+ generated_files = sorted(
163
+ [f for f in os.listdir(output_folder) if f.endswith(".png")])
164
+ prompt_to_images = {}
165
+ for f in generated_files:
166
+ prompt_idx = int(f.split("_")[0].replace("prompt", ""))
167
+ prompt_to_images.setdefault(prompt_idx, []).append(
168
+ os.path.join(output_folder, f))
169
+
170
+ all_features = {}
171
+ for prompt_idx, paths in tqdm(prompt_to_images.items(), desc="Extracting generated image's features"):
172
+ all_features[prompt_idx] = extract_features_from_paths(paths)
173
+
174
+ val_dataset = load_dataset("SimulaMet/Kvasir-VQA-test", split="validation")
175
+ prompt_to_real = requests.get(
176
+ "https://huggingface.co/datasets/SimulaMet/Kvasir-VQA-test/resolve/main/real_mapping").json()
177
+
178
+ print("Now, extracting real image's features...")
179
+ seen = set()
180
+ real_features_cache_ = val_dataset.filter(lambda x: x["img_id"] not in seen and not seen.add(x["img_id"])).map(
181
+ extract_features,
182
+ batched=True,
183
+ batch_size=128
184
+ )
185
+ real_features_cache = {
186
+ image_id: feature
187
+ for image_id, feature in zip(real_features_cache_["img_id"], real_features_cache_["features"])
188
+ }
189
+
190
+
191
+ # --- Pair prompts: (0,1), (2,3), ...
192
+ sorted_prompts = sorted(all_features.keys())
193
+ objectives = []
194
+ for i in range(0, len(sorted_prompts)//2, 2):
195
+ idx_A = sorted_prompts[i]
196
+ idx_B = sorted_prompts[i + 1]
197
+ A = all_features[idx_A]
198
+ B = all_features[idx_B]
199
+ objectives.append((idx_A, idx_B, A, B))
200
+
201
+ # --- Per-objective Metrics ---
202
+ fids, agreements, diversities = [], [], []
203
+ all_generated, all_real = [], []
204
+ per_prompt_data = []
205
+
206
+ for idx_A, idx_B, A, B in tqdm(objectives, desc="Scoring"):
207
+ sim_ab = mean_cosine_sim(A, B)
208
+ fid_ab = fid_score(A, B)
209
+ div_A = diversity_score(A)
210
+ div_B = diversity_score(B)
211
+
212
+ # Shared real reference for both prompts
213
+ # same as prompt_to_real[str(idx_B)]
214
+ real_keys = prompt_to_real[str(idx_A)]
215
+ # flag by SUSHANT, just to debug ;)
216
+ # real_keys = random.sample(val_dataset['img_id'], len(real_keys))
217
+ real_feats = np.array([real_features_cache[key] for key in real_keys])
218
+ fid_A_real = fid_score(A, real_feats)
219
+ fid_B_real = fid_score(B, real_feats)
220
+
221
+ # Collect for global metrics
222
+ all_generated.extend([*A, *B])
223
+ all_real.extend(real_feats)
224
+
225
+ fids.append((fid_A_real + fid_B_real) / 2)
226
+ agreements.append(sim_ab)
227
+ diversities.extend([div_A, div_B])
228
+
229
+ per_prompt_data.append({
230
+ "Prompt A": idx_A,
231
+ "Prompt B": idx_B,
232
+ "FID(A,B)": fid_ab,
233
+ "Agreement": sim_ab,
234
+ "Diversity A": div_A,
235
+ "Diversity B": div_B,
236
+ "FID A vs Real": fid_A_real,
237
+ "FID B vs Real": fid_B_real,
238
+ "Real Ref": real_feats
239
+ })
240
+
241
+ # --- Global FID ---
242
+ all_generated = np.array(all_generated)
243
+ all_real = np.array(all_real)
244
+ global_fid = fid_score(all_generated, all_real)
245
+
246
+ # --- Global Scores ---
247
+ fidelity_norm = np.mean(100 / (1 + np.array(fids)))
248
+ agreement_norm = np.mean(agreements)
249
+ diversity_norm = np.mean(diversities)
250
+ # final_score = 0.5 * fidelity_norm + 0.3 * agreement_norm + 0.2 * diversity_norm #lets not use this for now
251
+
252
+ # --- Output ---
253
+ public_scores = {
254
+ "fidelity": float(fidelity_norm),
255
+ "agreement": float(agreement_norm),
256
+ "diversity": float(diversity_norm),
257
+ "FBD": float(global_fid)
258
+ }
259
+
260
+
261
+ # end calculating metrics
262
+ output_data = {"submission_info": SUBMISSION_INFO, "public_scores": public_scores, "total_time": total_time, "time_per_item": total_time / len(val_dataset),
263
+ "memory_used_mb": final_mem, "model_memory_mb": model_mem_used, "gpu_name": gpu_name, "predictions": all_features, "debug": {
264
+ "packages": json.loads(subprocess.check_output([sys.executable, "-m", "pip", "list", "--format=json"])),
265
+ "system": {
266
+ "python": platform.python_version(),
267
+ "os": platform.system(),
268
+ "platform": platform.platform(),
269
+ "arch": platform.machine()
270
+ }}}
271
+
272
+
273
+ with open("predictions_2.json", "w") as f:
274
+ json.dump(output_data, f, indent=4)
275
+ print(f"Time: {total_time}s | Mem: {final_mem}MB | Model Load Mem: {model_mem_used}MB | GPU: {gpu_name}")
276
+ print("βœ… Scripts Looks Good! Generation process completed successfully. Results saved to 'predictions_2.json'.")
277
+ print("Next Step:\n 1) Upload this submission_task2.py script file to HuggingFace model repository.")
278
+ print('''\n 2) Make a submission to the competition:\n Run:: medvqa validate_and_submit --competition=gi-2025 --task=2 --repo_id=...''')