taesiri commited on
Commit
212e953
·
1 Parent(s): c6435d2
Files changed (1) hide show
  1. app.py +36 -6
app.py CHANGED
@@ -182,10 +182,31 @@ def get_annotated_indices(username):
182
  return set()
183
 
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def get_random_sample(username):
186
- """Get a random sample excluding previously annotated items"""
187
- # Get indices already annotated by this user
188
  annotated_indices = get_annotated_indices(username)
 
189
 
190
  # Get all valid indices that haven't been annotated
191
  all_indices = set(range(len(dataset)))
@@ -195,9 +216,18 @@ def get_random_sample(username):
195
  # If user has annotated all items, allow repeats
196
  available_indices = list(all_indices)
197
 
198
- # Randomly select from available indices
199
- idx = random.choice(available_indices)
200
- sample = dataset[idx]
 
 
 
 
 
 
 
 
 
201
 
202
  # Randomly decide which image goes to position A and B
203
  if random.choice([True, False]):
@@ -226,7 +256,7 @@ def get_random_sample(username):
226
  "image_b": image_b,
227
  "model_a": model_a,
228
  "model_b": model_b,
229
- "dataset_idx": idx, # Include the dataset index in the return
230
  }
231
 
232
 
 
182
  return set()
183
 
184
 
185
+ def get_annotated_post_ids(username):
186
+ """Get list of post_ids already annotated by this user"""
187
+ filename = "data/evaluation_results_exp.csv"
188
+ if not os.path.exists(filename):
189
+ print(f"No annotations found for user {username} (file doesn't exist)")
190
+ return set()
191
+
192
+ try:
193
+ df = pd.read_csv(filename)
194
+ if "post_id" not in df.columns or "username" not in df.columns:
195
+ print(f"No annotations found for user {username} (missing columns)")
196
+ return set()
197
+ user_annotations = df[df["username"] == username]["post_id"].tolist()
198
+ print(f"User {username} has seen {len(set(user_annotations))} unique posts")
199
+ return set(user_annotations)
200
+ except:
201
+ print(f"Error reading annotations for user {username}")
202
+ return set()
203
+
204
+
205
  def get_random_sample(username):
206
+ """Get a random sample trying to avoid previously seen post_ids"""
207
+ # Get indices and post_ids already annotated by this user
208
  annotated_indices = get_annotated_indices(username)
209
+ annotated_post_ids = get_annotated_post_ids(username)
210
 
211
  # Get all valid indices that haven't been annotated
212
  all_indices = set(range(len(dataset)))
 
216
  # If user has annotated all items, allow repeats
217
  available_indices = list(all_indices)
218
 
219
+ # Try up to 5 times to get a sample with unseen post_id
220
+ max_attempts = 5
221
+ for _ in range(max_attempts):
222
+ idx = random.choice(available_indices)
223
+ sample = dataset[idx]
224
+ if sample["post_id"] not in annotated_post_ids:
225
+ break
226
+ # Remove this index from available indices for next attempt
227
+ available_indices.remove(idx)
228
+ if not available_indices:
229
+ # If no more indices available, use the last sampled one
230
+ break
231
 
232
  # Randomly decide which image goes to position A and B
233
  if random.choice([True, False]):
 
256
  "image_b": image_b,
257
  "model_a": model_a,
258
  "model_b": model_b,
259
+ "dataset_idx": idx,
260
  }
261
 
262