matsant01 commited on
Commit
af5e0d4
Β·
1 Parent(s): 56c5ad3

Major update of code. Adding new data with our generations

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitignore +2 -0
  2. app.py +346 -371
  3. clean_preferences.py +104 -0
  4. config.py +64 -0
  5. data/Real-Cartoon/sample_0/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png +3 -0
  6. data/{sample_1/baseline.jpg β†’ Real-Cartoon/sample_0/cp_bg_fg.jpg} +2 -2
  7. data/{sample_100 β†’ Real-Cartoon/sample_0}/input_bg.jpg +2 -2
  8. data/{sample_154 β†’ Real-Cartoon/sample_0}/input_fg.jpg +0 -0
  9. data/{sample_1/input_bg.jpg β†’ Real-Cartoon/sample_0/kvedit.jpg} +2 -2
  10. data/Real-Cartoon/sample_0/prompt.txt +1 -0
  11. data/{sample_10 β†’ Real-Cartoon/sample_0}/tf-icon.png +2 -2
  12. data/Real-Cartoon/sample_1/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png +3 -0
  13. data/{sample_10/baseline.jpg β†’ Real-Cartoon/sample_1/cp_bg_fg.jpg} +2 -2
  14. data/{sample_101 β†’ Real-Cartoon/sample_1}/input_bg.jpg +2 -2
  15. data/{sample_121 β†’ Real-Cartoon/sample_1}/input_fg.jpg +0 -0
  16. data/Real-Cartoon/sample_1/kvedit.jpg +3 -0
  17. data/Real-Cartoon/sample_1/prompt.txt +1 -0
  18. data/{sample_100 β†’ Real-Cartoon/sample_1}/tf-icon.png +2 -2
  19. data/Real-Cartoon/sample_10/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png +3 -0
  20. data/Real-Cartoon/sample_10/cp_bg_fg.jpg +3 -0
  21. data/{sample_102 β†’ Real-Cartoon/sample_10}/input_bg.jpg +2 -2
  22. data/{sample_22 β†’ Real-Cartoon/sample_10}/input_fg.jpg +0 -0
  23. data/Real-Cartoon/sample_10/kvedit.jpg +3 -0
  24. data/Real-Cartoon/sample_10/prompt.txt +1 -0
  25. data/{sample_101 β†’ Real-Cartoon/sample_10}/tf-icon.png +2 -2
  26. data/Real-Cartoon/sample_11/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png +3 -0
  27. data/Real-Cartoon/sample_11/cp_bg_fg.jpg +3 -0
  28. data/Real-Cartoon/sample_11/input_bg.jpg +3 -0
  29. data/{sample_131 β†’ Real-Cartoon/sample_11}/input_fg.jpg +0 -0
  30. data/Real-Cartoon/sample_11/kvedit.jpg +3 -0
  31. data/Real-Cartoon/sample_11/prompt.txt +1 -0
  32. data/{sample_1 β†’ Real-Cartoon/sample_11}/tf-icon.png +2 -2
  33. data/Real-Cartoon/sample_12/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png +3 -0
  34. data/{sample_213/tf-icon.png β†’ Real-Cartoon/sample_12/cp_bg_fg.jpg} +2 -2
  35. data/Real-Cartoon/sample_12/input_bg.jpg +3 -0
  36. data/{sample_18 β†’ Real-Cartoon/sample_12}/input_fg.jpg +0 -0
  37. data/Real-Cartoon/sample_12/kvedit.jpg +3 -0
  38. data/Real-Cartoon/sample_12/prompt.txt +1 -0
  39. data/Real-Cartoon/sample_12/tf-icon.png +3 -0
  40. data/Real-Cartoon/sample_13/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png +3 -0
  41. data/Real-Cartoon/sample_13/cp_bg_fg.jpg +3 -0
  42. data/Real-Cartoon/sample_13/input_bg.jpg +3 -0
  43. data/{sample_160 β†’ Real-Cartoon/sample_13}/input_fg.jpg +0 -0
  44. data/Real-Cartoon/sample_13/kvedit.jpg +3 -0
  45. data/Real-Cartoon/sample_13/prompt.txt +1 -0
  46. data/Real-Cartoon/sample_13/tf-icon.png +3 -0
  47. data/Real-Cartoon/sample_14/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png +3 -0
  48. data/Real-Cartoon/sample_14/cp_bg_fg.jpg +3 -0
  49. data/Real-Cartoon/sample_14/input_bg.jpg +3 -0
  50. data/{sample_1 β†’ Real-Cartoon/sample_14}/input_fg.jpg +0 -0
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
  benchmark_images_generations/
2
  code/
3
  results/
 
 
 
1
  benchmark_images_generations/
2
  code/
3
  results/
4
+ backup/
5
+ __pycache__/
app.py CHANGED
@@ -1,396 +1,371 @@
1
  import gradio as gr
 
2
  import os
3
  import random
4
- import csv
5
- from pathlib import Path
6
- from datetime import datetime, timedelta
7
- import tempfile
8
- from huggingface_hub import HfApi, hf_hub_download, login
9
- from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
- import atexit
12
- import threading
13
- import time
14
- import shutil
15
-
16
- # --- Configuration ---
17
- DATASET_REPO_ID = os.getenv("DATASET_REPO_ID", "matsant01/user-study-collected-preferences")
18
- HF_TOKEN = os.getenv("HF_TOKEN")
19
- RESULTS_FILENAME_IN_REPO = "preferences.csv"
20
- TEMP_DIR = tempfile.mkdtemp()
21
- LOCAL_RESULTS_FILE = Path(TEMP_DIR) / RESULTS_FILENAME_IN_REPO
22
- UPLOAD_INTERVAL_HOURS = 0.1
23
-
24
- DATA_DIR = Path("data")
25
- IMAGE_EXTENSIONS = [".png", ".jpg", ".jpeg", ".webp"]
26
-
27
- # --- Global State for Upload Logic ---
28
- hf_api = None
29
- scheduler = BackgroundScheduler(daemon=True)
30
- upload_lock = threading.Lock()
31
- new_preferences_recorded_since_last_upload = threading.Event()
32
-
33
- # --- Hugging Face Hub Login & Initialization ---
34
- def initialize_hub_and_results():
35
- global hf_api
36
- if HF_TOKEN:
37
- print("Logging into Hugging Face Hub...")
38
  try:
39
- login(token=HF_TOKEN)
40
- hf_api = HfApi()
41
- print(f"Attempting initial download of {RESULTS_FILENAME_IN_REPO} from {DATASET_REPO_ID}")
42
- hf_hub_download(
43
- repo_id=DATASET_REPO_ID,
44
- filename=RESULTS_FILENAME_IN_REPO,
45
- repo_type="dataset",
46
- token=HF_TOKEN,
47
- local_dir=TEMP_DIR,
48
- local_dir_use_symlinks=False
49
- )
50
- print(f"Successfully downloaded existing {RESULTS_FILENAME_IN_REPO} to {LOCAL_RESULTS_FILE}")
51
- except EntryNotFoundError:
52
- print(f"{RESULTS_FILENAME_IN_REPO} not found in repo. Will create locally.")
53
- except RepositoryNotFoundError:
54
- print(f"Error: Dataset repository {DATASET_REPO_ID} not found or token lacks permissions.")
55
- print("Results saving will be disabled.")
56
- hf_api = None
57
  except Exception as e:
58
- print(f"Error during initial download/login: {e}")
59
- print("Proceeding without initial download. File will be created locally.")
60
  else:
61
- print("Warning: HF_TOKEN secret not found. Results will not be saved to the Hub.")
62
- hf_api = None
63
-
64
- # --- Data Loading ---
65
-
66
- def find_image(folder_path: Path, base_name: str) -> Path | None:
67
- for ext in IMAGE_EXTENSIONS:
68
- file_path = folder_path / f"{base_name}{ext}"
69
- if file_path.exists():
70
- return file_path
71
- return None
72
-
73
- def get_sample_ids() -> list[str]:
74
- sample_ids = []
75
- if DATA_DIR.is_dir():
76
- for item in DATA_DIR.iterdir():
77
- if item.is_dir():
78
- prompt_file = item / "prompt.txt"
79
- input_bg = find_image(item, "input_bg")
80
- input_fg = find_image(item, "input_fg")
81
- output_baseline = find_image(item, "baseline")
82
- output_tficon = find_image(item, "tf-icon")
83
- if prompt_file.exists() and input_bg and input_fg and output_baseline and output_tficon:
84
- sample_ids.append(item.name)
85
- return sample_ids
86
-
87
- def load_sample_data(sample_id: str) -> dict | None:
88
- sample_path = DATA_DIR / sample_id
89
- if not sample_path.is_dir():
90
- return None
91
-
92
- prompt_file = sample_path / "prompt.txt"
93
- input_bg_path = find_image(sample_path, "input_bg")
94
- input_fg_path = find_image(sample_path, "input_fg")
95
- output_baseline_path = find_image(sample_path, "baseline")
96
- output_tficon_path = find_image(sample_path, "tf-icon")
97
-
98
- if not all([prompt_file.exists(), input_bg_path, input_fg_path, output_baseline_path, output_tficon_path]):
99
- print(f"Warning: Missing files in sample {sample_id}")
100
- return None
101
-
102
- try:
103
- prompt = prompt_file.read_text().strip()
104
- except Exception as e:
105
- print(f"Error reading prompt for {sample_id}: {e}")
106
- return None
107
-
108
- return {
109
- "id": sample_id,
110
- "prompt": prompt,
111
- "input_bg": str(input_bg_path),
112
- "input_fg": str(input_fg_path),
113
- "output_baseline": str(output_baseline_path),
114
- "output_tficon": str(output_tficon_path),
115
- }
116
-
117
- # --- State and UI Logic ---
118
-
119
- INITIAL_SAMPLE_IDS = get_sample_ids()
120
-
121
- def get_next_sample(available_ids: list[str]) -> tuple[dict | None, list[str]]:
122
- if not available_ids:
123
- return None, []
124
- chosen_id = random.choice(available_ids)
125
- remaining_ids = [id for id in available_ids if id != chosen_id]
126
- sample_data = load_sample_data(chosen_id)
127
- return sample_data, remaining_ids
128
-
129
- def display_new_sample(state: dict, available_ids: list[str]):
130
- sample_data, remaining_ids = get_next_sample(available_ids)
131
-
132
- if not sample_data:
133
- return {
134
- prompt_display: gr.update(value="**Prompt:** No more samples available. Thank you!"),
135
- input_bg_display: gr.update(value=None, visible=False),
136
- input_fg_display: gr.update(value=None, visible=False),
137
- output_a_display: gr.update(value=None, visible=False),
138
- output_b_display: gr.update(value=None, visible=False),
139
- choice_button_a: gr.update(visible=False),
140
- choice_button_b: gr.update(visible=False),
141
- next_button: gr.update(visible=False),
142
- status_display: gr.update(value="**Status:** Completed!"),
143
- app_state: state,
144
- available_samples_state: remaining_ids
145
- }
146
-
147
- outputs = [
148
- {"model_name": "baseline", "path": sample_data["output_baseline"]},
149
- {"model_name": "tf-icon", "path": sample_data["output_tficon"]},
150
- ]
151
- random.shuffle(outputs)
152
- output_a = outputs[0]
153
- output_b = outputs[1]
154
-
155
- state = {
156
- "current_sample_id": sample_data["id"],
157
- "output_a_model_name": output_a["model_name"],
158
- "output_b_model_name": output_b["model_name"],
159
- }
160
-
161
- return {
162
- prompt_display: gr.update(value=f"**Prompt:** {sample_data['prompt']}"),
163
- input_bg_display: gr.update(value=sample_data["input_bg"], visible=True),
164
- input_fg_display: gr.update(value=sample_data["input_fg"], visible=True),
165
- output_a_display: gr.update(value=output_a["path"], visible=True),
166
- output_b_display: gr.update(value=output_b["path"], visible=True),
167
- choice_button_a: gr.update(visible=True, interactive=True),
168
- choice_button_b: gr.update(visible=True, interactive=True),
169
- next_button: gr.update(visible=False),
170
- status_display: gr.update(value="**Status:** Please choose the image you prefer."),
171
- app_state: state,
172
- available_samples_state: remaining_ids
173
- }
174
-
175
- def record_preference(choice: str, state: dict, request: gr.Request):
176
- if not request:
177
- print("Error: Request object is None. Cannot get session ID.")
178
- session_id = "unknown_session"
179
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  try:
181
- session_id = request.client.host
182
- except AttributeError:
183
- print("Error: request.client is None or has no 'host' attribute.")
184
- session_id = "unknown_client"
185
-
186
- if not state or "current_sample_id" not in state:
187
- print("Warning: State missing, cannot record preference.")
188
- return {
189
- choice_button_a: gr.update(interactive=False),
190
- choice_button_b: gr.update(interactive=False),
191
- next_button: gr.update(visible=True, interactive=True),
192
- status_display: gr.update(value="**Status:** Error: Session state lost. Click Next Sample."),
193
- app_state: state
194
- }
195
-
196
- chosen_model_name = state["output_a_model_name"] if choice == "A" else state["output_b_model_name"]
197
- baseline_display = "A" if state["output_a_model_name"] == "baseline" else "B"
198
- tficon_display = "B" if state["output_a_model_name"] == "baseline" else "A"
199
-
200
- new_row = {
201
- "timestamp": datetime.now().isoformat(),
202
- "session_id": session_id,
203
- "sample_id": state["current_sample_id"],
204
- "baseline_displayed_as": baseline_display,
205
- "tficon_displayed_as": tficon_display,
206
- "chosen_display": choice,
207
- "chosen_model_name": chosen_model_name
208
- }
209
- header = list(new_row.keys())
210
-
211
- try:
212
- with upload_lock:
213
- file_exists = LOCAL_RESULTS_FILE.exists()
214
- mode = 'a' if file_exists else 'w'
215
- with open(LOCAL_RESULTS_FILE, mode, newline='', encoding='utf-8') as f:
216
- writer = csv.DictWriter(f, fieldnames=header)
217
- if not file_exists or os.path.getsize(LOCAL_RESULTS_FILE) == 0:
218
- writer.writeheader()
219
- print(f"Created or wrote header to {LOCAL_RESULTS_FILE}")
220
- writer.writerow(new_row)
221
- print(f"Appended preference for {state['current_sample_id']} to local file.")
222
- new_preferences_recorded_since_last_upload.set()
223
-
224
- except Exception as e:
225
- print(f"Error writing local results file {LOCAL_RESULTS_FILE}: {e}")
226
- return {
227
- choice_button_a: gr.update(interactive=False),
228
- choice_button_b: gr.update(interactive=False),
229
- next_button: gr.update(visible=True, interactive=True),
230
- status_display: gr.update(value=f"**Status:** Error saving preference locally: {e}. Click Next."),
231
- app_state: state
232
- }
233
-
234
- return {
235
- choice_button_a: gr.update(interactive=False),
236
- choice_button_b: gr.update(interactive=False),
237
- next_button: gr.update(visible=True, interactive=True),
238
- status_display: gr.update(value=f"**Status:** Preference recorded (Chose {choice}). Click Next Sample."),
239
- app_state: state
240
- }
241
-
242
- def upload_preferences_to_hub():
243
- print("Periodic upload check triggered.")
244
- if not hf_api:
245
- print("Upload check skipped: Hugging Face API not available.")
246
- return
247
-
248
- if not new_preferences_recorded_since_last_upload.is_set():
249
- print("Upload check skipped: No new preferences recorded since last upload.")
250
- return
251
-
252
- with upload_lock:
253
- if not new_preferences_recorded_since_last_upload.is_set():
254
- print("Upload check skipped (race condition avoided): No new preferences.")
255
- return
256
-
257
- if not LOCAL_RESULTS_FILE.exists() or os.path.getsize(LOCAL_RESULTS_FILE) == 0:
258
- print("Upload check skipped: Local results file is missing or empty.")
259
- new_preferences_recorded_since_last_upload.clear()
260
- return
261
-
262
- try:
263
- print(f"Attempting to upload {LOCAL_RESULTS_FILE} to {DATASET_REPO_ID}/{RESULTS_FILENAME_IN_REPO}")
264
- start_time = time.time()
265
- hf_api.upload_file(
266
- path_or_fileobj=str(LOCAL_RESULTS_FILE),
267
- path_in_repo=RESULTS_FILENAME_IN_REPO,
268
- repo_id=DATASET_REPO_ID,
269
- repo_type="dataset",
270
- commit_message=f"Periodic upload of preferences - {datetime.now().isoformat()}"
271
- )
272
- end_time = time.time()
273
- print(f"Successfully uploaded preferences. Took {end_time - start_time:.2f} seconds.")
274
- new_preferences_recorded_since_last_upload.clear()
275
  except Exception as e:
276
- print(f"Error uploading results file: {e}")
277
-
278
- def handle_choice_a(state: dict, request: gr.Request):
279
- return record_preference("A", state, request)
280
-
281
- def handle_choice_b(state: dict, request: gr.Request):
282
- return record_preference("B", state, request)
283
-
284
- with gr.Blocks(title="Image Composition User Study") as demo:
285
- gr.Markdown("# Image Composition User Study")
286
- gr.Markdown(
287
- "> Please look at the input images and the prompt below. "
288
- "Then, compare the two output images (Output A and Output B) and click the button below the one you prefer."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  )
290
 
291
- app_state = gr.State({})
292
- available_samples_state = gr.State(INITIAL_SAMPLE_IDS)
293
-
294
- status_display = gr.Markdown("**Status:** Loading first sample...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
- gr.Markdown("## Inputs")
297
- with gr.Row():
298
- prompt_display = gr.Markdown("**Prompt:** Loading...")
299
  with gr.Row():
300
- with gr.Column():
301
- gr.Markdown("<div style='text-align: center;'>Input Background</div>")
302
- input_bg_display = gr.Image(type="filepath", height=250, width=250, interactive=False, show_label=False)
303
- with gr.Column():
304
- gr.Markdown("<div style='text-align: center;'>Input Foreground</div>")
305
- input_fg_display = gr.Image(type="filepath", height=250, width=250, interactive=False, show_label=False)
 
 
 
 
306
 
307
  gr.Markdown("---")
308
- gr.Markdown("## Choose your preferred output")
309
 
 
 
310
  with gr.Row():
311
- with gr.Column():
312
- output_a_display = gr.Image(label="Output A", type="filepath", height=400, width=400, interactive=False)
313
- choice_button_a = gr.Button("Choose Output A", variant="primary")
314
- with gr.Column():
315
- output_b_display = gr.Image(label="Output B", type="filepath", height=400, width=400, interactive=False)
316
- choice_button_b = gr.Button("Choose Output B", variant="primary")
317
-
318
- next_button = gr.Button("πŸ” Next Sample πŸ”", visible=False)
319
-
320
- demo.load(
321
- fn=display_new_sample,
322
- inputs=[app_state, available_samples_state],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  outputs=[
324
- prompt_display, input_bg_display, input_fg_display,
325
- output_a_display, output_b_display,
326
- choice_button_a, choice_button_b, next_button, status_display,
327
- app_state, available_samples_state
 
328
  ]
329
  )
330
 
331
- choice_button_a.click(
332
- fn=handle_choice_a,
333
- inputs=[app_state],
334
- outputs=[choice_button_a, choice_button_b, next_button, status_display, app_state],
335
- api_name=False,
336
- )
337
 
338
- choice_button_b.click(
339
- fn=handle_choice_b,
340
- inputs=[app_state],
341
- outputs=[choice_button_a, choice_button_b, next_button, status_display, app_state],
342
- api_name=False,
343
- )
344
-
345
- next_button.click(
346
- fn=display_new_sample,
347
- inputs=[app_state, available_samples_state],
348
- outputs=[
349
- prompt_display, input_bg_display, input_fg_display,
350
- output_a_display, output_b_display,
351
- choice_button_a, choice_button_b, next_button, status_display,
352
- app_state, available_samples_state
353
- ],
354
- api_name=False,
355
- )
356
-
357
- def cleanup_temp_dir():
358
- if Path(TEMP_DIR).exists():
359
- print(f"Cleaning up temporary directory: {TEMP_DIR}")
360
- shutil.rmtree(TEMP_DIR, ignore_errors=True)
361
-
362
- def shutdown_hook():
363
- print("Application shutting down. Performing final upload check...")
364
- upload_preferences_to_hub()
365
- if scheduler.running:
366
- print("Shutting down scheduler...")
367
- scheduler.shutdown(wait=False)
368
- cleanup_temp_dir()
369
- print("Shutdown complete.")
370
-
371
- atexit.register(shutdown_hook)
 
 
 
 
 
 
 
 
 
372
 
373
  if __name__ == "__main__":
374
- initialize_hub_and_results()
375
-
376
- if not INITIAL_SAMPLE_IDS:
377
- print("Error: No valid samples found in the 'data' directory.")
378
- print("Please ensure the 'data' directory exists and contains subdirectories")
379
- print("named like 'sample_id', each with 'prompt.txt', 'input_bg.*',")
380
- print("'input_fg.*', 'baseline.*', and 'tf-icon.*' files.")
381
- elif not DATASET_REPO_ID:
382
- print("Error: DATASET_REPO_ID environment variable is not set or is set to the default placeholder.")
383
- print("Please set the DATASET_REPO_ID environment variable or update the script.")
384
- elif hf_api:
385
- print(f"Starting periodic upload scheduler (every {UPLOAD_INTERVAL_HOURS} hours)...")
386
- scheduler.add_job(upload_preferences_to_hub, 'interval', hours=UPLOAD_INTERVAL_HOURS)
387
- scheduler.start()
388
- print(f"Found {len(INITIAL_SAMPLE_IDS)} samples.")
389
- print(f"Configured to save results periodically to Hugging Face Dataset: {DATASET_REPO_ID}")
390
- print("Starting Gradio app...")
391
- demo.launch(server_name="0.0.0.0")
392
- else:
393
- print("Warning: Running without Hugging Face Hub integration (HF_TOKEN or DATASET_REPO_ID missing/invalid).")
394
- print(f"Found {len(INITIAL_SAMPLE_IDS)} samples.")
395
- print("Starting Gradio app...")
396
- demo.launch(server_name="0.0.0.0")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
  import os
4
  import random
5
+ from datetime import datetime
 
 
 
 
 
6
  from apscheduler.schedulers.background import BackgroundScheduler
7
+ from PIL import Image
8
+
9
+ import config
10
+ import utils
11
+
12
+ # --- Global Variables & Initial Setup ---
13
+ # Attempt to log in to Hugging Face Hub at startup
14
+ utils.login_hugging_face()
15
+
16
+ # Load preferences: Try from Hub, then local, then empty
17
+ preferences_df = utils.load_preferences_from_hf_hub(config.HF_DATASET_REPO_ID, config.RESULTS_CSV_FILE)
18
+ if preferences_df is None:
19
+ if os.path.exists(config.RESULTS_CSV_FILE):
20
+ print(f"Loading preferences from local file: {config.RESULTS_CSV_FILE}")
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  try:
22
+ preferences_df = pd.read_csv(config.RESULTS_CSV_FILE)
23
+ except pd.errors.EmptyDataError:
24
+ print(f"Local preferences file {config.RESULTS_CSV_FILE} is empty. Starting fresh.")
25
+ preferences_df = pd.DataFrame(columns=config.CSV_HEADERS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  except Exception as e:
27
+ print(f"Error loading local {config.RESULTS_CSV_FILE}: {e}. Starting fresh.")
28
+ preferences_df = pd.DataFrame(columns=config.CSV_HEADERS)
29
  else:
30
+ print("No existing preferences found on Hub or locally. Starting with an empty table.")
31
+ preferences_df = pd.DataFrame(columns=config.CSV_HEADERS)
32
+
33
+ # Scan for available data
34
+ ALL_SAMPLES_BY_DOMAIN = utils.scan_data_directory(config.DATA_FOLDER)
35
+ if not ALL_SAMPLES_BY_DOMAIN:
36
+ print(f"CRITICAL: No data found in {config.DATA_FOLDER}. The app might not function correctly.")
37
+ # Potentially raise an error or display a message in the UI if no data
38
+
39
+ # --- Scheduler for Periodic Uploads ---
40
+ def scheduled_upload_job():
41
+ global preferences_df
42
+ print(f"Running scheduled job: Saving and uploading preferences at {datetime.now()}")
43
+ if preferences_df is not None and not preferences_df.empty:
44
+ utils.save_preferences_to_hf_hub(preferences_df, config.HF_DATASET_REPO_ID, config.RESULTS_CSV_FILE, commit_message="Periodic background update")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  else:
46
+ print("Scheduled job: Preferences DataFrame is empty. Nothing to upload.")
47
+
48
+ scheduler = BackgroundScheduler()
49
+ scheduler.add_job(scheduled_upload_job, 'interval', hours=config.PUSH_INTERVAL_HOURS)
50
+ scheduler.start()
51
+ print(f"Scheduler started. Will attempt to upload preferences every {config.PUSH_INTERVAL_HOURS} hour(s).")
52
+
53
+
54
+ # --- Core Gradio App Functions ---
55
+ def start_new_session():
56
+ """Initializes a new user session."""
57
+ session_id = utils.generate_session_id()
58
+ sample_queue = utils.prepare_session_samples(ALL_SAMPLES_BY_DOMAIN, config.SAMPLES_PER_DOMAIN)
59
+ current_sample_index = 0
60
+ if not sample_queue:
61
+ no_samples_msg = f"# πŸ˜₯ No Samples Available!\n\n### Please check the data folder configuration or try again later."
62
+ return session_id, sample_queue, current_sample_index, no_samples_msg, None, None, None, [], [], True
63
+
64
+ print(f"New session started: {session_id}, with {len(sample_queue)} samples.")
65
+ domain_prompt_md, bg, fg, s_data, out_imgs, disp_info, end_flag = load_and_display_sample(sample_queue, current_sample_index)
66
+ return session_id, sample_queue, current_sample_index, domain_prompt_md, bg, fg, s_data, out_imgs, disp_info, end_flag
67
+
68
+
69
+ def load_and_display_sample(sample_queue, current_sample_index):
70
+ """Loads and prepares a single sample for display."""
71
+ if not sample_queue or current_sample_index >= len(sample_queue):
72
+ end_session_msg = f"# πŸŽ‰ All Rated! πŸŽ‰\n\n### All samples for this session have been rated. Thank you!"
73
+ return end_session_msg, None, None, None, [], [], True # End of session
74
+
75
+ domain, sample_id = sample_queue[current_sample_index]
76
+ sample_data = utils.load_sample_data(domain, sample_id)
77
+
78
+ if sample_data is None:
79
+ print(f"Error loading sample {domain}/{sample_id}. Skipping.")
80
+ error_msg = f"## ⚠️ Error Loading Sample\n\nCould not load data for {domain}/{sample_id}. Skipping to the next one."
81
+ return error_msg, None, None, None, [], [], False
82
+
83
+ prompt_text = sample_data["prompt"]
84
+ bg_img_path = sample_data["background_img_path"]
85
+ fg_img_path = sample_data["foreground_img_path"]
86
+
87
+ # Load input bg/fg images without forcing them to be square
88
+ # The gr.Image component will handle scaling to the specified height while preserving aspect ratio.
89
+ bg_image_to_display = Image.open(bg_img_path)
90
+ fg_image_to_display = Image.open(fg_img_path)
91
+
92
+ output_model_keys = list(sample_data["output_image_paths"].keys())
93
+ random.shuffle(output_model_keys)
94
+
95
+ displayed_models_info = []
96
+ output_images_for_display = []
97
+
98
+ # square_size is still used for output option images
99
+ square_size = (config.IMAGE_DISPLAY_SIZE[0], config.IMAGE_DISPLAY_SIZE[0])
100
+
101
+ for model_key in output_model_keys:
102
+ img_path = sample_data["output_image_paths"][model_key]
103
  try:
104
+ img = Image.open(img_path).resize(square_size) # Output images remain square
105
+ output_images_for_display.append(img)
106
+ displayed_models_info.append((model_key, img_path))
107
+ except FileNotFoundError:
108
+ print(f"Image not found: {img_path} for model {model_key}. Skipping this option.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  except Exception as e:
110
+ print(f"Error loading or resizing image {img_path}: {e}. Skipping this option.")
111
+
112
+ blank_image = Image.new('RGB', square_size, (200, 200, 200))
113
+ while len(output_images_for_display) < 4:
114
+ output_images_for_display.append(blank_image)
115
+ displayed_models_info.append(("BLANK_SLOT", "N/A"))
116
+
117
+ domain_prompt_markdown = f"""
118
+ <div style="padding:15px 20px 20px 20px;border-left:3px black;background-color:#4B5966;border-radius: 10px;color:black;">
119
+
120
+ ### Domain: {domain}
121
+
122
+ </div>
123
+ <br>
124
+ <div style="padding:15px 20px 20px 20px;border-left:3px black;background-color:#4B5966;border-radius: 10px;color:black;">
125
+
126
+ ## Prompt
127
+
128
+ ### _"{prompt_text}"_
129
+
130
+ </div>
131
+ """
132
+
133
+ return (
134
+ domain_prompt_markdown,
135
+ bg_image_to_display, # Pass the PIL image directly
136
+ fg_image_to_display, # Pass the PIL image directly
137
+ sample_data,
138
+ output_images_for_display[:4],
139
+ displayed_models_info[:4],
140
+ False
141
  )
142
 
143
+ def process_vote(choice_index, session_id, sample_queue, current_sample_index, current_sample_data, displayed_models_info_for_sample):
144
+ global preferences_df
145
+
146
+ if current_sample_data is None or not displayed_models_info_for_sample or choice_index >= len(displayed_models_info_for_sample):
147
+ print("Error: Invalid data for processing vote. Skipping.")
148
+ current_sample_index += 1
149
+ if current_sample_index >= len(sample_queue):
150
+ error_end_msg = f"# ⚠️ Error Processing Vote ⚠️\n\n### An issue occurred. The session has ended."
151
+ return preferences_df, current_sample_index, error_end_msg, None, None, None, [], [], True
152
+ else:
153
+ next_prompt_md, next_bg, next_fg, next_s_data, next_out_imgs, next_disp_info, next_hide = load_and_display_sample(sample_queue, current_sample_index)
154
+ return preferences_df, current_sample_index, next_prompt_md, next_bg, next_fg, next_s_data, next_out_imgs, next_disp_info, next_hide
155
+
156
+ domain, sample_id = sample_queue[current_sample_index]
157
+ preferred_model_key, _ = displayed_models_info_for_sample[choice_index]
158
+
159
+ if preferred_model_key == "BLANK_SLOT":
160
+ print("User clicked on a blank slot. Vote not recorded. Please select a valid image.")
161
+ _prompt_md, _bg, _fg, _s_data, _out_imgs, _disp_info, _hide = load_and_display_sample(sample_queue, current_sample_index)
162
+ return preferences_df, current_sample_index, _prompt_md, _bg, _fg, _s_data, _out_imgs, _disp_info, _hide
163
+
164
+ print(f"Session {session_id}: Voted for model '{config.MODEL_DISPLAY_NAMES.get(preferred_model_key, preferred_model_key)}' (key: {preferred_model_key}) for sample {domain}/{sample_id}")
165
+
166
+ preferences_df = utils.record_preference(
167
+ df=preferences_df,
168
+ session_id=session_id,
169
+ domain=domain,
170
+ sample_id=sample_id,
171
+ prompt=current_sample_data["prompt"],
172
+ bg_path=current_sample_data["background_img_path"],
173
+ fg_path=current_sample_data["foreground_img_path"],
174
+ displayed_models_info=displayed_models_info_for_sample,
175
+ preferred_model_key=preferred_model_key
176
+ )
177
+
178
+ try:
179
+ preferences_df.to_csv(config.RESULTS_CSV_FILE, index=False)
180
+ print(f"Preferences saved locally to {config.RESULTS_CSV_FILE}")
181
+ except Exception as e:
182
+ print(f"Error saving preferences locally: {e}")
183
+
184
+ current_sample_index += 1
185
+ if current_sample_index >= len(sample_queue):
186
+ utils.save_preferences_to_hf_hub(preferences_df, config.HF_DATASET_REPO_ID, config.RESULTS_CSV_FILE, commit_message="Session end update")
187
+ final_msg = f"# πŸŽ‰ Session Complete! πŸŽ‰\n\n### All samples have been rated. Thank you for your participation!"
188
+ return preferences_df, current_sample_index, final_msg, None, None, None, [], [], True
189
+
190
+ next_prompt_md, next_bg, next_fg, next_s_data, next_out_imgs, next_disp_info, next_hide = load_and_display_sample(sample_queue, current_sample_index)
191
+ return preferences_df, current_sample_index, next_prompt_md, next_bg, next_fg, next_s_data, next_out_imgs, next_disp_info, next_hide
192
+
193
+
194
+ # --- Gradio UI Definition ---
195
+ custom_css = """
196
+ .custom-vote-button {
197
+ background-color: #FFA500 !important; /* Light Orange for normal state */
198
+ border-color: #FFA500 !important; /* Light Orange for normal state */
199
+ color: white !important;
200
+ }
201
+ .custom-vote-button:hover {
202
+ background-color: #FF8C00 !important; /* Dark Orange for hover state */
203
+ border-color: #FF8C00 !important; /* Dark Orange for hover state */
204
+ color: white !important;
205
+ }
206
+ """
207
+
208
+ with gr.Blocks(title=config.APP_TITLE, theme=gr.themes.Soft(primary_hue=gr.themes.colors.blue), css=custom_css) as demo:
209
+ session_id_state = gr.State()
210
+ sample_queue_state = gr.State([])
211
+ current_sample_index_state = gr.State(0)
212
+ current_sample_data_state = gr.State()
213
+ displayed_models_info_state = gr.State([])
214
+ preferences_df_state = gr.State(value=preferences_df)
215
+
216
+ gr.Markdown(f"# {config.APP_TITLE}")
217
+ gr.Markdown(config.APP_DESCRIPTION)
218
 
 
 
 
219
  with gr.Row():
220
+ start_button = gr.Button("Start New Session / Load First Sample", variant="primary")
221
+
222
+ with gr.Row(equal_height=False):
223
+ with gr.Column(scale=1):
224
+ domain_prompt_info_display = gr.Markdown(value="### Click 'Start New Session' to begin.")
225
+
226
+ with gr.Column(scale=2):
227
+ with gr.Row():
228
+ input_bg_image_display = gr.Image(label="Input Background", type="pil", height=config.IMAGE_DISPLAY_SIZE[0], interactive=False)
229
+ input_fg_image_display = gr.Image(label="Input Foreground", type="pil", height=config.IMAGE_DISPLAY_SIZE[0], interactive=False)
230
 
231
  gr.Markdown("---")
232
+ gr.Markdown("## Choose your preferred composed image:")
233
 
234
+ output_image_displays = []
235
+ vote_buttons = []
236
  with gr.Row():
237
+ for i in range(4):
238
+ with gr.Column():
239
+ img_display = gr.Image(label=f"Option {i+1}", type="pil", height=config.IMAGE_DISPLAY_SIZE[0], width=config.IMAGE_DISPLAY_SIZE[0], interactive=False)
240
+ output_image_displays.append(img_display)
241
+ vote_btn = gr.Button(f"Select Option {i+1}", elem_id=f"vote_btn_{i}", elem_classes=["custom-vote-button"])
242
+ vote_buttons.append(vote_btn)
243
+
244
+ end_of_session_msg_display = gr.Markdown("", visible=True)
245
+
246
+ def handle_start_session():
247
+ s_id, s_queue, s_idx, domain_prompt_or_end_msg, bg, fg, s_data, out_imgs, disp_info, end = start_new_session()
248
+
249
+ while len(out_imgs) < 4: out_imgs.append(None)
250
+ while len(disp_info) < 4: disp_info.append(("BLANK_SLOT", "N/A"))
251
+
252
+ updates = {
253
+ session_id_state: s_id,
254
+ sample_queue_state: s_queue,
255
+ current_sample_index_state: s_idx,
256
+ domain_prompt_info_display: domain_prompt_or_end_msg if not end else "",
257
+ input_bg_image_display: bg,
258
+ input_fg_image_display: fg,
259
+ current_sample_data_state: s_data,
260
+ displayed_models_info_state: disp_info,
261
+ end_of_session_msg_display: domain_prompt_or_end_msg if end else ""
262
+ }
263
+ for i in range(4):
264
+ updates[output_image_displays[i]] = out_imgs[i] if i < len(out_imgs) else None
265
+ num_actual_outputs = 0
266
+ if s_data and "output_image_paths" in s_data and s_data["output_image_paths"]:
267
+ num_actual_outputs = sum(1 for m_key, _ in disp_info if m_key != "BLANK_SLOT" and m_key is not None)
268
+ updates[vote_buttons[i]] = gr.Button(interactive=not end and i < num_actual_outputs)
269
+ return updates
270
+
271
+ start_button.click(
272
+ fn=handle_start_session,
273
+ inputs=[],
274
  outputs=[
275
+ session_id_state, sample_queue_state, current_sample_index_state,
276
+ domain_prompt_info_display,
277
+ input_bg_image_display, input_fg_image_display,
278
+ current_sample_data_state, displayed_models_info_state, end_of_session_msg_display,
279
+ *output_image_displays, *vote_buttons
280
  ]
281
  )
282
 
283
+ def make_vote_fn(choice_idx):
284
+ def vote_action(s_id, s_queue, s_idx, current_s_data, disp_info_for_sample, prefs_df_val):
285
+ global preferences_df
286
+ preferences_df = prefs_df_val
 
 
287
 
288
+ new_prefs_df, new_s_idx, domain_prompt_or_end_msg, bg, fg, new_s_data, out_imgs, new_disp_info, end = process_vote(
289
+ choice_idx, s_id, s_queue, s_idx, current_s_data, disp_info_for_sample
290
+ )
291
+
292
+ while len(out_imgs) < 4: out_imgs.append(None)
293
+ while len(new_disp_info) < 4: new_disp_info.append(("BLANK_SLOT", "N/A"))
294
+
295
+ updates = {
296
+ preferences_df_state: new_prefs_df,
297
+ current_sample_index_state: new_s_idx,
298
+ domain_prompt_info_display: domain_prompt_or_end_msg if not end else "",
299
+ input_bg_image_display: bg,
300
+ input_fg_image_display: fg,
301
+ current_sample_data_state: new_s_data,
302
+ displayed_models_info_state: new_disp_info,
303
+ end_of_session_msg_display: domain_prompt_or_end_msg if end else ""
304
+ }
305
+ for i in range(4):
306
+ updates[output_image_displays[i]] = out_imgs[i] if i < len(out_imgs) else None
307
+ num_actual_outputs = 0
308
+ if new_s_data and "output_image_paths" in new_s_data and new_s_data["output_image_paths"]:
309
+ num_actual_outputs = sum(1 for m_key, _ in new_disp_info if m_key != "BLANK_SLOT" and m_key is not None)
310
+ updates[vote_buttons[i]] = gr.Button(interactive=not end and i < num_actual_outputs)
311
+ return updates
312
+ return vote_action
313
+
314
+ for i, btn in enumerate(vote_buttons):
315
+ btn.click(
316
+ fn=make_vote_fn(i),
317
+ inputs=[
318
+ session_id_state, sample_queue_state, current_sample_index_state,
319
+ current_sample_data_state, displayed_models_info_state, preferences_df_state
320
+ ],
321
+ outputs=[
322
+ preferences_df_state, current_sample_index_state,
323
+ domain_prompt_info_display,
324
+ input_bg_image_display, input_fg_image_display,
325
+ current_sample_data_state, displayed_models_info_state, end_of_session_msg_display,
326
+ *output_image_displays, *vote_buttons
327
+ ]
328
+ )
329
+
330
+ gr.Markdown(config.FOOTER_MESSAGE)
331
 
332
  if __name__ == "__main__":
333
+ if not os.path.exists(config.DATA_FOLDER):
334
+ print(f"Creating dummy data folder: {config.DATA_FOLDER}")
335
+ os.makedirs(config.DATA_FOLDER, exist_ok=True)
336
+
337
+ dummy_domains = ["Real-Cartoon", "Real-Painting"]
338
+ dummy_model_keys = list(config.MODEL_OUTPUT_IMAGE_NAMES.keys())
339
+
340
+ for domain in dummy_domains:
341
+ domain_path = os.path.join(config.DATA_FOLDER, domain)
342
+ os.makedirs(domain_path, exist_ok=True)
343
+ for i in range(config.SAMPLES_PER_DOMAIN + 2):
344
+ sample_id = f"sample_{i:03d}"
345
+ sample_path = os.path.join(domain_path, sample_id)
346
+ os.makedirs(sample_path, exist_ok=True)
347
+
348
+ with open(os.path.join(sample_path, config.PROMPT_FILE_NAME), "w") as f:
349
+ f.write(f"This is a dummy prompt for {domain} sample {sample_id}.")
350
+
351
+ colors = [(255,0,0), (0,255,0), (0,0,255), (255,255,0), (0,255,255)]
352
+ try:
353
+ img_bg = Image.new('RGB', config.IMAGE_DISPLAY_SIZE, color='gray')
354
+ img_bg.save(os.path.join(sample_path, config.BACKGROUND_IMAGE_NAME))
355
+
356
+ img_fg = Image.new('RGB', config.IMAGE_DISPLAY_SIZE, color='lightgray')
357
+ img_fg.save(os.path.join(sample_path, config.FOREGROUND_IMAGE_NAME))
358
+
359
+ for idx, model_key in enumerate(dummy_model_keys):
360
+ model_img_name = config.MODEL_OUTPUT_IMAGE_NAMES[model_key]
361
+ img_model = Image.new('RGB', config.IMAGE_DISPLAY_SIZE, color=colors[idx % len(colors)])
362
+ img_model.save(os.path.join(sample_path, model_img_name))
363
+ except Exception as e:
364
+ print(f"Error creating dummy image: {e}")
365
+ print("Dummy data creation complete.")
366
+ ALL_SAMPLES_BY_DOMAIN = utils.scan_data_directory(config.DATA_FOLDER)
367
+
368
+ demo.launch()
369
+
370
+ import atexit
371
+ atexit.register(lambda: scheduler.shutdown() if scheduler.running else None)
clean_preferences.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import datasets
3
+ import pandas as pd
4
+ from datetime import datetime
5
+
6
+ from config import BACKUP_FOLDER, HF_DATASET_REPO_ID, HF_TOKEN, RESULTS_CSV_FILE, CSV_HEADERS
7
+
8
+ def main():
9
+ """
10
+ Gets the dataset from HF Hub where preferences are being collected,
11
+ save it locally to a backup folder with a timestamp.
12
+ Then creates an empty dataset with the same structure and saves it to the HF Hub.
13
+ """
14
+ print(f"Attempting to load dataset '{HF_DATASET_REPO_ID}' from Hugging Face Hub (file: {RESULTS_CSV_FILE})...")
15
+ try:
16
+ # 1. Get the dataset from HF Hub
17
+ # Ensure the token has write permissions for pushing later.
18
+ dataset = datasets.load_dataset(HF_DATASET_REPO_ID, data_files=RESULTS_CSV_FILE, token=HF_TOKEN, split='train')
19
+ print(f"Successfully loaded dataset. It has {len(dataset)} entries.")
20
+ dataset_df = dataset.to_pandas()
21
+ except Exception as e:
22
+ print(f"Error loading dataset from Hugging Face Hub: {e}")
23
+ print("This could be due to the dataset/file not existing, or token issues.")
24
+ print("Attempting to proceed by creating an empty structure for backup and remote reset.")
25
+ # If loading fails, we might still want to try to clear the remote
26
+ # or at least create an empty local backup structure.
27
+ dataset_df = pd.DataFrame(columns=CSV_HEADERS) # Use predefined headers
28
+
29
+ # 2. Save it locally to a backup folder with a timestamp
30
+ if not os.path.exists(BACKUP_FOLDER):
31
+ os.makedirs(BACKUP_FOLDER)
32
+ print(f"Created backup folder: {BACKUP_FOLDER}")
33
+
34
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
35
+ backup_filename = f"preferences_backup_{timestamp}.csv"
36
+ backup_filepath = os.path.join(BACKUP_FOLDER, backup_filename)
37
+
38
+ try:
39
+ dataset_df.to_csv(backup_filepath, index=False)
40
+ print(f"Successfully backed up current preferences (or empty structure) to: {backup_filepath}")
41
+ except Exception as e:
42
+ print(f"Error saving backup to {backup_filepath}: {e}")
43
+ # Decide if to return or continue to try clearing remote
44
+ # For now, let's continue to try clearing remote if backup fails
45
+
46
+ # 3. Create an empty dataset with the same structure (using config.CSV_HEADERS)
47
+ print(f"Creating an empty dataset structure using predefined CSV_HEADERS: {CSV_HEADERS}")
48
+ empty_df = pd.DataFrame(columns=CSV_HEADERS)
49
+ empty_dataset = datasets.Dataset.from_pandas(empty_df)
50
+
51
+ # 4. Save the empty dataset to the HF Hub
52
+ print(f"Attempting to push the empty dataset to '{HF_DATASET_REPO_ID}' (file: {RESULTS_CSV_FILE}) on Hugging Face Hub...")
53
+ try:
54
+ # To push a specific CSV file and overwrite it, we can push a dictionary
55
+ # where the key is the name of the file in the repo (without .csv extension if that's how load_dataset names splits)
56
+ # or more robustly, save to a local temp CSV and use that path in push_to_hub.
57
+
58
+ # Create a DatasetDict. The key 'train' is a common default split name.
59
+ # If your dataset on the Hub uses a different split name for this CSV, adjust accordingly.
60
+ # Or, if RESULTS_CSV_FILE is the exact filename on the hub, that's what we want to replace.
61
+ dataset_dict_to_push = datasets.DatasetDict({"train": empty_dataset})
62
+
63
+ # The push_to_hub for a DatasetDict will typically create Parquet files by default.
64
+ # To ensure it's a CSV, we might need to save it locally first and then push that file.
65
+ # However, let's try pushing the DatasetDict directly first, as it might handle CSVs
66
+ # if the original dataset was loaded as such.
67
+ # For more direct control over pushing a CSV file:
68
+ temp_empty_csv_path = "_temp_empty_prefs.csv"
69
+ empty_df.to_csv(temp_empty_csv_path, index=False)
70
+
71
+ # The `push_to_hub` method on a Dataset object itself can be used.
72
+ # To ensure it overwrites the correct file, it's often best to structure it as a DatasetDict
73
+ # or manage file uploads more directly if the library offers it for specific file types.
74
+
75
+ # Let's use a method that's common for replacing a dataset with a new version from a local file.
76
+ # We'll upload our temporary empty CSV.
77
+ # This requires the `huggingface_hub` library to be installed and logged in.
78
+ from huggingface_hub import HfApi
79
+ api = HfApi(token=os.getenv("HF_HUB_TOKEN", HF_TOKEN))
80
+
81
+ api.upload_file(
82
+ path_or_fileobj=temp_empty_csv_path,
83
+ path_in_repo=RESULTS_CSV_FILE, # This should be the path to the CSV file in the repo
84
+ repo_id=HF_DATASET_REPO_ID,
85
+ repo_type="dataset",
86
+ commit_message=f"Reset {RESULTS_CSV_FILE} to empty by script"
87
+ )
88
+
89
+ if os.path.exists(temp_empty_csv_path):
90
+ os.remove(temp_empty_csv_path)
91
+
92
+ print(f"Successfully pushed empty dataset to replace {RESULTS_CSV_FILE} in Hugging Face Hub: {HF_DATASET_REPO_ID}")
93
+ print("The remote dataset CSV should now be empty but retain its structure based on CSV_HEADERS.")
94
+ print(f"IMPORTANT: The old data (if any) is backed up at {backup_filepath}")
95
+
96
+ except Exception as e:
97
+ print(f"Error pushing empty dataset to Hugging Face Hub: {e}")
98
+ if os.path.exists(temp_empty_csv_path):
99
+ os.remove(temp_empty_csv_path)
100
+ print("The remote dataset might not have been cleared. Please check the Hugging Face Hub.")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
config.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration for the Gradio User Study App
2
+
3
+ # --- File and Folder Names ---
4
+ DATA_FOLDER = "data" # Main folder containing domain subfolders
5
+ BACKGROUND_IMAGE_NAME = "input_bg.jpg" # Standard name for background input
6
+ FOREGROUND_IMAGE_NAME = "input_fg.jpg" # Standard name for foreground input
7
+ PROMPT_FILE_NAME = "prompt.txt" # Standard name for the prompt file
8
+ # Names for the output images from different models.
9
+ # These should be actual filenames present in each sample's folder.
10
+ MODEL_OUTPUT_IMAGE_NAMES = {
11
+ "baseline": "cp_bg_fg.jpg",
12
+ "kv-edit": "kvedit.jpg",
13
+ "tf-icon": "tf-icon.png",
14
+ "dit-editor": "alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png",
15
+ }
16
+ # Names to display for each model in the UI (can be different from filenames)
17
+ MODEL_DISPLAY_NAMES = {
18
+ "baseline": "Model A",
19
+ "kv-edit": "Model B",
20
+ "tf-icon": "Model C",
21
+ "dit-editor": "Model D",
22
+ }
23
+
24
+ # --- Data Collection ---
25
+ RESULTS_CSV_FILE = "user_preferences.csv"
26
+ CSV_HEADERS = [
27
+ "session_id",
28
+ "timestamp",
29
+ "domain",
30
+ "sample_id",
31
+ "prompt",
32
+ "input_background",
33
+ "input_foreground",
34
+ "displayed_order_model_1", # To store which model was shown in 1st position
35
+ "displayed_order_model_2", # To store which model was shown in 2nd position
36
+ "displayed_order_model_3", # To store which model was shown in 3rd position
37
+ "displayed_order_model_4", # To store which model was shown in 4th position
38
+ "preferred_model_key", # The key of the preferred model (e.g., "model_a")
39
+ "preferred_model_filename" # The filename of the preferred image
40
+ ]
41
+ SAMPLES_PER_DOMAIN = 3 # Number of samples to show from each domain per user session
42
+
43
+ # --- Hugging Face Hub ---
44
+ HF_DATASET_REPO_ID = "matsant01/dit-editor-collected-preferences" # Replace with your actual repo ID
45
+ HF_TOKEN = None # Set this if your dataset is private, or use HF_HUB_TOKEN env var
46
+ PUSH_INTERVAL_HOURS = 1 # Interval in hours to push results to the Hub
47
+
48
+ # --- UI Configuration ---
49
+ IMAGE_DISPLAY_SIZE = (300, 300) # (width, height) for displaying images
50
+ APP_TITLE = "Image Composition User Study"
51
+ APP_DESCRIPTION = """
52
+ Please look at the input foreground and background images, and the text prompt used for generation, then choose the composed image that you prefer the most.
53
+ You consider:
54
+ * πŸ“Έ **subject consistency**: does the subject resemble the one in the foreground image? Or is it just a similar object/animal?
55
+ * πŸ–ΌοΈ **background preservation**: is the background image correctly preserved?
56
+ * 🎨 **style blending**: is the subject style correctly adapted to the one of the background?
57
+ """
58
+ FOOTER_MESSAGE = "Thank you for participating!"
59
+
60
+ # --- Other ---
61
+ SESSION_ID_LENGTH = 16 # Length of the randomly generated session ID
62
+
63
+ # --- Paths ---
64
+ BACKUP_FOLDER = "backup"
data/Real-Cartoon/sample_0/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png ADDED

Git LFS Details

  • SHA256: 6ccf1e7287e72dac377184b6a428b11784f0483d06856ecaa093cc88e98491f6
  • Pointer size: 131 Bytes
  • Size of remote file: 275 kB
data/{sample_1/baseline.jpg β†’ Real-Cartoon/sample_0/cp_bg_fg.jpg} RENAMED
File without changes
data/{sample_100 β†’ Real-Cartoon/sample_0}/input_bg.jpg RENAMED
File without changes
data/{sample_154 β†’ Real-Cartoon/sample_0}/input_fg.jpg RENAMED
File without changes
data/{sample_1/input_bg.jpg β†’ Real-Cartoon/sample_0/kvedit.jpg} RENAMED
File without changes
data/Real-Cartoon/sample_0/prompt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ a cartoon animation of a hamburger, a croissant, a piece of bread and a cup of coffee
data/{sample_10 β†’ Real-Cartoon/sample_0}/tf-icon.png RENAMED
File without changes
data/Real-Cartoon/sample_1/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png ADDED

Git LFS Details

  • SHA256: 70a3f02be630c9cba8cfffa2a0117e6c347f5df1f709dea39eea7262a9edcd9c
  • Pointer size: 131 Bytes
  • Size of remote file: 268 kB
data/{sample_10/baseline.jpg β†’ Real-Cartoon/sample_1/cp_bg_fg.jpg} RENAMED
File without changes
data/{sample_101 β†’ Real-Cartoon/sample_1}/input_bg.jpg RENAMED
File without changes
data/{sample_121 β†’ Real-Cartoon/sample_1}/input_fg.jpg RENAMED
File without changes
data/Real-Cartoon/sample_1/kvedit.jpg ADDED

Git LFS Details

  • SHA256: 881c9daec7cc3770ba8c874ba6380e47d1d0dc8b17f9888095223cb6ccdeeabc
  • Pointer size: 131 Bytes
  • Size of remote file: 101 kB
data/Real-Cartoon/sample_1/prompt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ a cartoon animation of a muffin, a croissant, a piece of bread and a cup of coffee
data/{sample_100 β†’ Real-Cartoon/sample_1}/tf-icon.png RENAMED
File without changes
data/Real-Cartoon/sample_10/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png ADDED

Git LFS Details

  • SHA256: fe95d5adabbc2cd11192bfaad2dcaa56feb55036da54e230d7e0745ad0a8f5a2
  • Pointer size: 131 Bytes
  • Size of remote file: 332 kB
data/Real-Cartoon/sample_10/cp_bg_fg.jpg ADDED

Git LFS Details

  • SHA256: 715b29a7bb61e4829211ff2c5234d91ff4cd42862d9e03c64e13677173ac6efb
  • Pointer size: 130 Bytes
  • Size of remote file: 41.9 kB
data/{sample_102 β†’ Real-Cartoon/sample_10}/input_bg.jpg RENAMED
File without changes
data/{sample_22 β†’ Real-Cartoon/sample_10}/input_fg.jpg RENAMED
File without changes
data/Real-Cartoon/sample_10/kvedit.jpg ADDED

Git LFS Details

  • SHA256: 08798cba07eb4267d3b0a88e42c0fdb1145212a02c293b5306e2af5ee5912f69
  • Pointer size: 131 Bytes
  • Size of remote file: 131 kB
data/Real-Cartoon/sample_10/prompt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ a cartoon animation of a shopping mall in the distance
data/{sample_101 β†’ Real-Cartoon/sample_10}/tf-icon.png RENAMED
File without changes
data/Real-Cartoon/sample_11/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png ADDED

Git LFS Details

  • SHA256: d0512571664a2a77575d4646698299df0d9f9d3bf3654c1154c6bdd64519e458
  • Pointer size: 131 Bytes
  • Size of remote file: 339 kB
data/Real-Cartoon/sample_11/cp_bg_fg.jpg ADDED

Git LFS Details

  • SHA256: 6a8aa5bdd635747405250c71b35fd0dd2eeccf942c65001e2b6714a48fe3e930
  • Pointer size: 130 Bytes
  • Size of remote file: 38.6 kB
data/Real-Cartoon/sample_11/input_bg.jpg ADDED

Git LFS Details

  • SHA256: 24c87e9fa07e8139aa9dc784d5950f7e8676b402d2fd7eb83ea2dbae2090ea11
  • Pointer size: 131 Bytes
  • Size of remote file: 326 kB
data/{sample_131 β†’ Real-Cartoon/sample_11}/input_fg.jpg RENAMED
File without changes
data/Real-Cartoon/sample_11/kvedit.jpg ADDED

Git LFS Details

  • SHA256: fd7909e85a59db3a80d6f4e205bbcbebbe6900fc1c70eca384a7cadd4e3478bb
  • Pointer size: 131 Bytes
  • Size of remote file: 108 kB
data/Real-Cartoon/sample_11/prompt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ a cartoon animation of a panda in the forest
data/{sample_1 β†’ Real-Cartoon/sample_11}/tf-icon.png RENAMED
File without changes
data/Real-Cartoon/sample_12/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png ADDED

Git LFS Details

  • SHA256: cb5e77e7a70120af39ffa36810e4aadb50f78d58efc129862977d1d89ea5b2bb
  • Pointer size: 131 Bytes
  • Size of remote file: 334 kB
data/{sample_213/tf-icon.png β†’ Real-Cartoon/sample_12/cp_bg_fg.jpg} RENAMED
File without changes
data/Real-Cartoon/sample_12/input_bg.jpg ADDED

Git LFS Details

  • SHA256: 7a10dd603795ec4b208aa53ac775a9581eff98e2d9ef295e89762c494a788599
  • Pointer size: 131 Bytes
  • Size of remote file: 329 kB
data/{sample_18 β†’ Real-Cartoon/sample_12}/input_fg.jpg RENAMED
File without changes
data/Real-Cartoon/sample_12/kvedit.jpg ADDED

Git LFS Details

  • SHA256: 583fa5d458cbf0d694ace136fcfa3f02f4fe21db81474506ab4d31d212c83915
  • Pointer size: 131 Bytes
  • Size of remote file: 133 kB
data/Real-Cartoon/sample_12/prompt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ a cartoon animation of buildings in the distance
data/Real-Cartoon/sample_12/tf-icon.png ADDED

Git LFS Details

  • SHA256: 62bf0070f63091e6d35abd95bab7a22ca4d8dcb2e1b80762687b3a300808ca8a
  • Pointer size: 131 Bytes
  • Size of remote file: 345 kB
data/Real-Cartoon/sample_13/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png ADDED

Git LFS Details

  • SHA256: 5d27e8dcce9baf4afc8f3359196dbb7a1c1f446af77ff223351a7108bb7f7d53
  • Pointer size: 131 Bytes
  • Size of remote file: 337 kB
data/Real-Cartoon/sample_13/cp_bg_fg.jpg ADDED

Git LFS Details

  • SHA256: b9ef185356c68207307926caeb03383c29d3b2d5bce96d0c605a24f56a957ad5
  • Pointer size: 130 Bytes
  • Size of remote file: 38.3 kB
data/Real-Cartoon/sample_13/input_bg.jpg ADDED

Git LFS Details

  • SHA256: 24c87e9fa07e8139aa9dc784d5950f7e8676b402d2fd7eb83ea2dbae2090ea11
  • Pointer size: 131 Bytes
  • Size of remote file: 326 kB
data/{sample_160 β†’ Real-Cartoon/sample_13}/input_fg.jpg RENAMED
File without changes
data/Real-Cartoon/sample_13/kvedit.jpg ADDED

Git LFS Details

  • SHA256: 13617d96485ce3e840b61071ddfc6aff881a484425e5fa44083119032f959873
  • Pointer size: 131 Bytes
  • Size of remote file: 108 kB
data/Real-Cartoon/sample_13/prompt.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ a cartoon animation of a fox in the forest
data/Real-Cartoon/sample_13/tf-icon.png ADDED

Git LFS Details

  • SHA256: 3902e79a3bff78b035291b2beeec216a0579eaa46e4b1336df197638e5dcb0e0
  • Pointer size: 131 Bytes
  • Size of remote file: 362 kB
data/Real-Cartoon/sample_14/alphanoise0.05_timesteps50_QTrue_KTrue_VTrue_taua0.4_taub0.8_guidance3.0.png ADDED

Git LFS Details

  • SHA256: 02d94a6916c1f9fa2dd408683d8f90d9ad3a3e5027e10c624637ca19ad7fcd38
  • Pointer size: 131 Bytes
  • Size of remote file: 340 kB
data/Real-Cartoon/sample_14/cp_bg_fg.jpg ADDED

Git LFS Details

  • SHA256: 374443bff5a44823f9231b4995350e04b27f2610efb36890569a44f9e0a630a9
  • Pointer size: 130 Bytes
  • Size of remote file: 37.5 kB
data/Real-Cartoon/sample_14/input_bg.jpg ADDED

Git LFS Details

  • SHA256: 24c87e9fa07e8139aa9dc784d5950f7e8676b402d2fd7eb83ea2dbae2090ea11
  • Pointer size: 131 Bytes
  • Size of remote file: 326 kB
data/{sample_1 β†’ Real-Cartoon/sample_14}/input_fg.jpg RENAMED
File without changes