taesiri commited on
Commit
52730a4
Β·
1 Parent(s): e127770
Files changed (1) hide show
  1. app.py +935 -0
app.py ADDED
@@ -0,0 +1,935 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import base64
3
+ import json
4
+ import os
5
+ import shutil
6
+ import uuid
7
+ import glob
8
+ from huggingface_hub import CommitScheduler, HfApi, snapshot_download
9
+ from pathlib import Path
10
+
11
+ api = HfApi(token=os.environ["HF_TOKEN"])
12
+
13
+
14
+ # Download existing data from hub
15
+ def sync_with_hub():
16
+ """
17
+ Synchronize local data with the hub by downloading latest dataset
18
+ """
19
+ print("Starting sync with hub...")
20
+ data_dir = Path("./data")
21
+ if data_dir.exists():
22
+ # Backup existing data
23
+ backup_dir = Path("./data_backup")
24
+ if backup_dir.exists():
25
+ shutil.rmtree(backup_dir)
26
+ shutil.copytree(data_dir, backup_dir)
27
+
28
+ # Download latest data from hub
29
+ repo_path = snapshot_download(
30
+ repo_id="taesiri/zb_dataset_storage2", repo_type="dataset", local_dir="hub_data"
31
+ )
32
+
33
+ # Merge hub data with local data
34
+ hub_data_dir = Path(repo_path) / "data"
35
+ if hub_data_dir.exists():
36
+ # Create data dir if it doesn't exist
37
+ data_dir.mkdir(exist_ok=True)
38
+
39
+ # Copy files from hub
40
+ for item in hub_data_dir.glob("*"):
41
+ if item.is_dir():
42
+ dest = data_dir / item.name
43
+ if not dest.exists(): # Only copy if doesn't exist locally
44
+ shutil.copytree(item, dest)
45
+
46
+ # Clean up downloaded repo
47
+ if Path("hub_data").exists():
48
+ shutil.rmtree("hub_data")
49
+ print("Finished syncing with hub!")
50
+
51
+
52
+ scheduler = CommitScheduler(
53
+ repo_id="taesiri/zb_dataset_storage2",
54
+ repo_type="dataset",
55
+ folder_path="./data",
56
+ path_in_repo="data",
57
+ every=1,
58
+ )
59
+
60
+
61
+ def load_existing_questions():
62
+ """
63
+ Load all existing questions from the data directory
64
+ Returns a list of tuples (question_id, question_preview)
65
+ """
66
+ questions = []
67
+ data_dir = "./data"
68
+ if not os.path.exists(data_dir):
69
+ return questions
70
+
71
+ for question_dir in glob.glob(os.path.join(data_dir, "*")):
72
+ if os.path.isdir(question_dir):
73
+ json_path = os.path.join(question_dir, "question.json")
74
+ if os.path.exists(json_path):
75
+ try:
76
+ with open(json_path, "r", encoding="utf-8") as f:
77
+ data = json.loads(f.read().strip())
78
+ question_id = os.path.basename(question_dir)
79
+ preview = (
80
+ f"{data['question'][:100]}..."
81
+ if len(data["question"]) > 100
82
+ else data["question"]
83
+ )
84
+ questions.append((question_id, f"{question_id}: {preview}"))
85
+ except:
86
+ continue
87
+
88
+ return sorted(questions, key=lambda x: x[1])
89
+
90
+
91
+ def load_question_data(question_id):
92
+ """
93
+ Load a specific question's data
94
+ Returns a tuple of all form fields
95
+ """
96
+ if not question_id:
97
+ return [None] * 26 + [None] # Changed from gr.State(value=None) to just None
98
+
99
+ # Extract the ID part before the colon from the dropdown selection
100
+ question_id = (
101
+ question_id.split(":")[0].strip() if ":" in question_id else question_id
102
+ )
103
+
104
+ json_path = os.path.join("./data", question_id, "question.json")
105
+ if not os.path.exists(json_path):
106
+ print(f"Question file not found: {json_path}")
107
+ return [None] * 26 + [None]
108
+
109
+ try:
110
+ with open(json_path, "r", encoding="utf-8") as f:
111
+ data = json.loads(f.read().strip())
112
+
113
+ # Load images
114
+ def load_image(image_path):
115
+ if not image_path:
116
+ return None
117
+ full_path = os.path.join(
118
+ "./data", question_id, os.path.basename(image_path)
119
+ )
120
+ return full_path if os.path.exists(full_path) else None
121
+
122
+ question_images = data.get("question_images", [])
123
+ rationale_images = data.get("rationale_images", [])
124
+
125
+ # Convert authorship_interest to boolean if it's a string
126
+ authorship = data["author_info"].get("authorship_interest", False)
127
+ if isinstance(authorship, str):
128
+ authorship = authorship.lower() == "true"
129
+
130
+ return [
131
+ data["author_info"]["name"],
132
+ data["author_info"]["email_address"],
133
+ data["author_info"]["institution"],
134
+ data["author_info"].get("openreview_profile", ""),
135
+ authorship,
136
+ (
137
+ ",".join(data["question_categories"])
138
+ if isinstance(data["question_categories"], list)
139
+ else data["question_categories"]
140
+ ),
141
+ data.get("subquestions_1_text", "N/A"),
142
+ data.get("subquestions_1_answer", "N/A"),
143
+ data.get("subquestions_2_text", "N/A"),
144
+ data.get("subquestions_2_answer", "N/A"),
145
+ data.get("subquestions_3_text", "N/A"),
146
+ data.get("subquestions_3_answer", "N/A"),
147
+ data.get("subquestions_4_text", "N/A"),
148
+ data.get("subquestions_4_answer", "N/A"),
149
+ data.get("subquestions_5_text", "N/A"),
150
+ data.get("subquestions_5_answer", "N/A"),
151
+ data["question"],
152
+ data["final_answer"],
153
+ data.get("rationale_text", ""),
154
+ data["image_attribution"],
155
+ load_image(question_images[0] if question_images else None),
156
+ load_image(question_images[1] if len(question_images) > 1 else None),
157
+ load_image(question_images[2] if len(question_images) > 2 else None),
158
+ load_image(question_images[3] if len(question_images) > 3 else None),
159
+ load_image(rationale_images[0] if rationale_images else None),
160
+ load_image(rationale_images[1] if len(rationale_images) > 1 else None),
161
+ question_id, # Changed from gr.State(value=question_id) to just question_id
162
+ ]
163
+ except Exception as e:
164
+ print(f"Error loading question {question_id}: {str(e)}")
165
+ return [None] * 26 + [None]
166
+
167
+
168
+ def generate_json_files(
169
+ name,
170
+ email_address,
171
+ institution,
172
+ openreview_profile,
173
+ authorship_interest,
174
+ question_categories,
175
+ subquestion_1_text,
176
+ subquestion_1_answer,
177
+ subquestion_2_text,
178
+ subquestion_2_answer,
179
+ subquestion_3_text,
180
+ subquestion_3_answer,
181
+ subquestion_4_text,
182
+ subquestion_4_answer,
183
+ subquestion_5_text,
184
+ subquestion_5_answer,
185
+ question,
186
+ final_answer,
187
+ rationale_text,
188
+ image_attribution,
189
+ image1,
190
+ image2,
191
+ image3,
192
+ image4,
193
+ rationale_image1,
194
+ rationale_image2,
195
+ existing_id=None, # New parameter for updating existing questions
196
+ ):
197
+ """
198
+ For each request:
199
+ 1) Create a unique folder under ./data/ (or use existing if updating)
200
+ 2) Copy uploaded images (question + rationale) into that folder
201
+ 3) Produce JSON file with question data
202
+ 4) Return path to the JSON file
203
+ """
204
+
205
+ # Use existing ID if updating, otherwise generate new one
206
+ request_id = existing_id if existing_id else str(uuid.uuid4())
207
+
208
+ # Create parent data folder if it doesn't exist
209
+ parent_data_folder = "./data"
210
+ os.makedirs(parent_data_folder, exist_ok=True)
211
+
212
+ # Create or clean request folder
213
+ request_folder = os.path.join(parent_data_folder, request_id)
214
+ if os.path.exists(request_folder):
215
+ # If updating, remove old image files but only if new images are provided
216
+ for f in glob.glob(os.path.join(request_folder, "*.png")):
217
+ # Only remove if we have a new image to replace it
218
+ filename = os.path.basename(f)
219
+ if (
220
+ ("question_image_1" in filename and image1)
221
+ or ("question_image_2" in filename and image2)
222
+ or ("question_image_3" in filename and image3)
223
+ or ("question_image_4" in filename and image4)
224
+ or ("rationale_image_1" in filename and rationale_image1)
225
+ or ("rationale_image_2" in filename and rationale_image2)
226
+ ):
227
+ os.remove(f)
228
+ else:
229
+ os.makedirs(request_folder)
230
+
231
+ # Convert None strings
232
+ def safe_str(val):
233
+ return val if val is not None else ""
234
+
235
+ name = safe_str(name)
236
+ email_address = safe_str(email_address)
237
+ institution = safe_str(institution)
238
+ openreview_profile = safe_str(openreview_profile)
239
+ authorship_interest = safe_str(authorship_interest)
240
+ image_attribution = safe_str(image_attribution)
241
+ # Convert question_categories to list
242
+ question_categories = (
243
+ [cat.strip() for cat in safe_str(question_categories).split(",")]
244
+ if question_categories
245
+ else []
246
+ )
247
+ subquestion_1_text = safe_str(subquestion_1_text)
248
+ subquestion_1_answer = safe_str(subquestion_1_answer)
249
+ subquestion_2_text = safe_str(subquestion_2_text)
250
+ subquestion_2_answer = safe_str(subquestion_2_answer)
251
+ subquestion_3_text = safe_str(subquestion_3_text)
252
+ subquestion_3_answer = safe_str(subquestion_3_answer)
253
+ subquestion_4_text = safe_str(subquestion_4_text)
254
+ subquestion_4_answer = safe_str(subquestion_4_answer)
255
+ subquestion_5_text = safe_str(subquestion_5_text)
256
+ subquestion_5_answer = safe_str(subquestion_5_answer)
257
+ question = safe_str(question)
258
+ final_answer = safe_str(final_answer)
259
+ rationale_text = safe_str(rationale_text)
260
+
261
+ # Collect image-like fields so we can process them in one loop
262
+ all_images = [
263
+ ("question_image_1", image1),
264
+ ("question_image_2", image2),
265
+ ("question_image_3", image3),
266
+ ("question_image_4", image4),
267
+ ("rationale_image_1", rationale_image1),
268
+ ("rationale_image_2", rationale_image2),
269
+ ]
270
+
271
+ # If updating, load existing images that haven't been replaced
272
+ if existing_id:
273
+ json_path = os.path.join(parent_data_folder, existing_id, "question.json")
274
+ if os.path.exists(json_path):
275
+ try:
276
+ with open(json_path, "r", encoding="utf-8") as f:
277
+ existing_data = json.loads(f.read().strip())
278
+ existing_question_images = existing_data.get("question_images", [])
279
+ existing_rationale_images = existing_data.get(
280
+ "rationale_images", []
281
+ )
282
+
283
+ # Keep existing images if no new ones provided
284
+ if not image1 and existing_question_images:
285
+ all_images[0] = (
286
+ "question_image_1",
287
+ existing_question_images[0],
288
+ )
289
+ if not image2 and len(existing_question_images) > 1:
290
+ all_images[1] = (
291
+ "question_image_2",
292
+ existing_question_images[1],
293
+ )
294
+ if not image3 and len(existing_question_images) > 2:
295
+ all_images[2] = (
296
+ "question_image_3",
297
+ existing_question_images[2],
298
+ )
299
+ if not image4 and len(existing_question_images) > 3:
300
+ all_images[3] = (
301
+ "question_image_4",
302
+ existing_question_images[3],
303
+ )
304
+ if not rationale_image1 and existing_rationale_images:
305
+ all_images[4] = (
306
+ "rationale_image_1",
307
+ existing_rationale_images[0],
308
+ )
309
+ if not rationale_image2 and len(existing_rationale_images) > 1:
310
+ all_images[5] = (
311
+ "rationale_image_2",
312
+ existing_rationale_images[1],
313
+ )
314
+ except:
315
+ pass
316
+
317
+ files_list = []
318
+ for idx, (img_label, img_obj) in enumerate(all_images):
319
+ if img_obj is not None:
320
+ temp_path = os.path.join(request_folder, f"{img_label}.png")
321
+ if isinstance(img_obj, str):
322
+ # If image is a file path
323
+ if os.path.exists(img_obj):
324
+ if (
325
+ img_obj != temp_path
326
+ ): # Only copy if source and destination are different
327
+ shutil.copy2(img_obj, temp_path)
328
+ files_list.append((img_label, temp_path))
329
+ else:
330
+ # If image is a numpy array
331
+ gr.processing_utils.save_image(img_obj, temp_path)
332
+ files_list.append((img_label, temp_path))
333
+
334
+ # Build user content in two flavors: local file paths vs base64
335
+ # We'll store text fields as simple dictionaries, and then images separately.
336
+ content_list_urls = [
337
+ {"type": "field", "label": "name", "value": name},
338
+ {"type": "field", "label": "email_address", "value": email_address},
339
+ {"type": "field", "label": "institution", "value": institution},
340
+ {"type": "field", "label": "openreview_profile", "value": openreview_profile},
341
+ {"type": "field", "label": "authorship_interest", "value": authorship_interest},
342
+ {"type": "field", "label": "question_categories", "value": question_categories},
343
+ {"type": "field", "label": "image_attribution", "value": image_attribution},
344
+ {"type": "field", "label": "subquestion_1_text", "value": subquestion_1_text},
345
+ {
346
+ "type": "field",
347
+ "label": "subquestion_1_answer",
348
+ "value": subquestion_1_answer,
349
+ },
350
+ {"type": "field", "label": "subquestion_2_text", "value": subquestion_2_text},
351
+ {
352
+ "type": "field",
353
+ "label": "subquestion_2_answer",
354
+ "value": subquestion_2_answer,
355
+ },
356
+ {"type": "field", "label": "subquestion_3_text", "value": subquestion_3_text},
357
+ {
358
+ "type": "field",
359
+ "label": "subquestion_3_answer",
360
+ "value": subquestion_3_answer,
361
+ },
362
+ {"type": "field", "label": "subquestion_4_text", "value": subquestion_4_text},
363
+ {
364
+ "type": "field",
365
+ "label": "subquestion_4_answer",
366
+ "value": subquestion_4_answer,
367
+ },
368
+ {"type": "field", "label": "subquestion_5_text", "value": subquestion_5_text},
369
+ {
370
+ "type": "field",
371
+ "label": "subquestion_5_answer",
372
+ "value": subquestion_5_answer,
373
+ },
374
+ {"type": "field", "label": "question", "value": question},
375
+ {"type": "field", "label": "final_answer", "value": final_answer},
376
+ {"type": "field", "label": "rationale_text", "value": rationale_text},
377
+ ]
378
+
379
+ # Append image references
380
+ for img_label, file_path in files_list:
381
+ # 1) Local path (URL) version
382
+ rel_path = os.path.join(".", os.path.basename(file_path))
383
+ content_list_urls.append(
384
+ {
385
+ "type": "image_url",
386
+ "label": img_label,
387
+ "image_url": {"url": {"data:image/png;path": rel_path}},
388
+ }
389
+ )
390
+
391
+ # Build the final JSON structures for each approach
392
+ # A) URLs JSON
393
+ item_urls = {
394
+ "custom_id": f"question___{request_id}",
395
+ # Metadata at top level
396
+ "author_info": {
397
+ "name": name,
398
+ "email_address": email_address,
399
+ "institution": institution,
400
+ "openreview_profile": openreview_profile,
401
+ "authorship_interest": authorship_interest,
402
+ },
403
+ "question_categories": question_categories,
404
+ "image_attribution": image_attribution,
405
+ "question": question,
406
+ "question_images": [
407
+ item["image_url"]["url"]["data:image/png;path"]
408
+ for item in content_list_urls
409
+ if item.get("type") == "image_url"
410
+ and "question_image" in item.get("label", "")
411
+ ],
412
+ "final_answer": final_answer,
413
+ "rationale_text": rationale_text,
414
+ "rationale_images": [
415
+ item["image_url"]["url"]["data:image/png;path"]
416
+ for item in content_list_urls
417
+ if item.get("type") == "image_url"
418
+ and "rationale_image" in item.get("label", "")
419
+ ],
420
+ "subquestions_1_text": subquestion_1_text,
421
+ "subquestions_1_answer": subquestion_1_answer,
422
+ "subquestions_2_text": subquestion_2_text,
423
+ "subquestions_2_answer": subquestion_2_answer,
424
+ "subquestions_3_text": subquestion_3_text,
425
+ "subquestions_3_answer": subquestion_3_answer,
426
+ "subquestions_4_text": subquestion_4_text,
427
+ "subquestions_4_answer": subquestion_4_answer,
428
+ "subquestions_5_text": subquestion_5_text,
429
+ "subquestions_5_answer": subquestion_5_answer,
430
+ }
431
+
432
+ # Convert each to JSON line format
433
+ urls_json_line = json.dumps(item_urls, ensure_ascii=False)
434
+
435
+ # 3) Write out JSON file in request_folder
436
+ urls_jsonl_path = os.path.join(request_folder, "question.json")
437
+
438
+ with open(urls_jsonl_path, "w", encoding="utf-8") as f:
439
+ f.write(urls_json_line + "\n")
440
+
441
+ return urls_jsonl_path
442
+
443
+
444
+ # Build the Gradio app
445
+ with gr.Blocks() as demo:
446
+ gr.Markdown("# Dataset Builder")
447
+ # Add a global state variable at the top level
448
+ loaded_question_id = gr.State()
449
+
450
+ with gr.Accordion("Instructions", open=True):
451
+ gr.HTML(
452
+ """
453
+ <h3>Instructions:</h3>
454
+ <p>Welcome to the Hugging Face space for collecting questions for new benchmark datasets.</p>
455
+
456
+ <table style="width:100%; border-collapse: collapse; margin: 10px 0;">
457
+ <tr>
458
+ <th style="width:50%; background-color: #3366f0; padding: 8px; text-align: left; border: 1px solid #ddd;">
459
+ Required Fields
460
+ </th>
461
+ <th style="width:50%; background-color: #3366f0; padding: 8px; text-align: left; border: 1px solid #ddd;">
462
+ Optional Fields
463
+ </th>
464
+ </tr>
465
+ <tr>
466
+ <td style="vertical-align: top; padding: 8px; border: 1px solid #ddd;">
467
+ <ul style="margin: 0;">
468
+ <li>Author Information</li>
469
+ <li>At least <b>one question image</b></li>
470
+ <li>The <b>question text</b></li>
471
+ <li>The <b>final answer</b></li>
472
+ <li><b>Sub-questions</b> with their answers (write 'N/A' if breaking into steps is not reasonable - please use sparingly)</li>
473
+ </ul>
474
+ </td>
475
+ <td style="vertical-align: top; padding: 8px; border: 1px solid #ddd;">
476
+ <ul style="margin: 0;">
477
+ <li>Up to three additional question images</li>
478
+ <li>Supporting images for your answer</li>
479
+ <li><b>Rationale text</b> to explain your reasoning</li>
480
+ </ul>
481
+ </td>
482
+ </tr>
483
+ </table>
484
+
485
+ <h3>Question Criteria:</h3>
486
+ <ul>
487
+ <li>Make questions as challenging as possible. At a minimum, obtaining the correct answer needs to be beyond the capabilities of state-of-the-art large multimodal models.</li>
488
+ <li>Structure your questions to require multiple steps/sub-questions to reach the final answer (e.g., identifying/counting specific objects in the image or requiring a particular piece of knowledge) β€” this will likely enable better differentiation of model performance.</li>
489
+ <li>Include images/questions that are not copyright-restricted.</li>
490
+ </ul>
491
+
492
+ <h3>Authorship Opportunity:</h3>
493
+ <p>Would you like to be included as an author on our paper? Authorship is offered to anyone submitting 5 or more difficult questions!</p>
494
+
495
+ <p>While not all fields are mandatory, providing additional context through optional fields will help create a more comprehensive dataset. After submitting a question, you can clear up the form to submit another one.</p>
496
+ """
497
+ )
498
+ gr.Markdown("## Author Information")
499
+ with gr.Row():
500
+ name_input = gr.Textbox(label="Name", lines=1)
501
+ email_address_input = gr.Textbox(label="Email Address", lines=1)
502
+ institution_input = gr.Textbox(
503
+ label="Institution or 'Independent'",
504
+ lines=1,
505
+ placeholder="e.g. MIT, Google, Independent, etc.",
506
+ )
507
+ openreview_profile_input = gr.Textbox(
508
+ label="OpenReview Profile Name",
509
+ lines=1,
510
+ placeholder="Your OpenReview username or profile name",
511
+ )
512
+
513
+ # Add authorship checkbox
514
+ authorship_input = gr.Checkbox(
515
+ label="Would you like to be considered for authorship? (Requires submitting 5+ difficult questions)",
516
+ value=False,
517
+ )
518
+
519
+ gr.Markdown("## Question Information")
520
+
521
+ # image
522
+ gr.Markdown("### Images Attribution")
523
+ image_attribution_input = gr.Textbox(
524
+ label="Images Attribution",
525
+ lines=1,
526
+ placeholder="Include attribution information for the images used in this question (or 'Own' if you created/took them)",
527
+ )
528
+
529
+ # Question Images - Individual Tabs
530
+ with gr.Tabs():
531
+ with gr.Tab("Image 1"):
532
+ image1 = gr.Image(label="Question Image 1", type="filepath")
533
+ with gr.Tab("Image 2 (Optional)"):
534
+ image2 = gr.Image(label="Question Image 2", type="filepath")
535
+ with gr.Tab("Image 3 (Optional)"):
536
+ image3 = gr.Image(label="Question Image 3", type="filepath")
537
+ with gr.Tab("Image 4 (Optional)"):
538
+ image4 = gr.Image(label="Question Image 4", type="filepath")
539
+
540
+ question_input = gr.Textbox(
541
+ label="Question", lines=15, placeholder="Type your question here..."
542
+ )
543
+
544
+ question_categories_input = gr.Textbox(
545
+ label="Question Categories",
546
+ lines=1,
547
+ placeholder="Comma-separated tags, e.g. math, geometry",
548
+ )
549
+
550
+ # Answer Section
551
+ gr.Markdown("## Answer ")
552
+
553
+ final_answer_input = gr.Textbox(
554
+ label="Final Answer",
555
+ lines=1,
556
+ placeholder="Enter the short/concise final answer...",
557
+ )
558
+
559
+ rationale_text_input = gr.Textbox(
560
+ label="Rationale Text",
561
+ lines=5,
562
+ placeholder="Enter the reasoning or explanation for the answer...",
563
+ )
564
+
565
+ # Rationale Images - Individual Tabs
566
+ with gr.Tabs():
567
+ with gr.Tab("Rationale 1 (Optional)"):
568
+ rationale_image1 = gr.Image(label="Rationale Image 1", type="filepath")
569
+ with gr.Tab("Rationale 2 (Optional)"):
570
+ rationale_image2 = gr.Image(label="Rationale Image 2", type="filepath")
571
+
572
+ # Subquestions Section
573
+ gr.Markdown("## Subquestions")
574
+ with gr.Row():
575
+ subquestion_1_text_input = gr.Textbox(
576
+ label="Subquestion 1 Text",
577
+ lines=2,
578
+ placeholder="First sub-question...",
579
+ value="N/A",
580
+ )
581
+ subquestion_1_answer_input = gr.Textbox(
582
+ label="Subquestion 1 Answer",
583
+ lines=2,
584
+ placeholder="Answer to sub-question 1...",
585
+ value="N/A",
586
+ )
587
+
588
+ with gr.Row():
589
+ subquestion_2_text_input = gr.Textbox(
590
+ label="Subquestion 2 Text",
591
+ lines=2,
592
+ placeholder="Second sub-question...",
593
+ value="N/A",
594
+ )
595
+ subquestion_2_answer_input = gr.Textbox(
596
+ label="Subquestion 2 Answer",
597
+ lines=2,
598
+ placeholder="Answer to sub-question 2...",
599
+ value="N/A",
600
+ )
601
+
602
+ with gr.Row():
603
+ subquestion_3_text_input = gr.Textbox(
604
+ label="Subquestion 3 Text",
605
+ lines=2,
606
+ placeholder="Third sub-question...",
607
+ value="N/A",
608
+ )
609
+ subquestion_3_answer_input = gr.Textbox(
610
+ label="Subquestion 3 Answer",
611
+ lines=2,
612
+ placeholder="Answer to sub-question 3...",
613
+ value="N/A",
614
+ )
615
+
616
+ with gr.Row():
617
+ subquestion_4_text_input = gr.Textbox(
618
+ label="Subquestion 4 Text",
619
+ lines=2,
620
+ placeholder="Fourth sub-question...",
621
+ value="N/A",
622
+ )
623
+ subquestion_4_answer_input = gr.Textbox(
624
+ label="Subquestion 4 Answer",
625
+ lines=2,
626
+ placeholder="Answer to sub-question 4...",
627
+ value="N/A",
628
+ )
629
+
630
+ with gr.Row():
631
+ subquestion_5_text_input = gr.Textbox(
632
+ label="Subquestion 5 Text",
633
+ lines=2,
634
+ placeholder="Fifth sub-question...",
635
+ value="N/A",
636
+ )
637
+ subquestion_5_answer_input = gr.Textbox(
638
+ label="Subquestion 5 Answer",
639
+ lines=2,
640
+ placeholder="Answer to sub-question 5...",
641
+ value="N/A",
642
+ )
643
+
644
+ with gr.Row():
645
+ submit_button = gr.Button("Submit")
646
+ clear_button = gr.Button("Clear Form")
647
+
648
+ with gr.Row():
649
+ output_file_urls = gr.File(
650
+ label="Download URLs JSON", interactive=False, visible=False
651
+ )
652
+ output_file_base64 = gr.File(
653
+ label="Download Base64 JSON", interactive=False, visible=False
654
+ )
655
+
656
+ with gr.Accordion("Load Existing Question", open=False):
657
+ gr.Markdown("## Load Existing Question")
658
+
659
+ with gr.Row():
660
+ existing_questions = gr.Dropdown(
661
+ label="Load Existing Question",
662
+ choices=load_existing_questions(),
663
+ type="value",
664
+ allow_custom_value=False,
665
+ )
666
+ refresh_button = gr.Button("πŸ”„ Refresh")
667
+ load_button = gr.Button("Load Selected Question")
668
+
669
+ def refresh_questions():
670
+ return gr.Dropdown(choices=load_existing_questions())
671
+
672
+ refresh_button.click(fn=refresh_questions, inputs=[], outputs=[existing_questions])
673
+
674
+ # Load button functionality
675
+ load_button.click(
676
+ fn=load_question_data,
677
+ inputs=[existing_questions],
678
+ outputs=[
679
+ name_input,
680
+ email_address_input,
681
+ institution_input,
682
+ openreview_profile_input,
683
+ authorship_input,
684
+ question_categories_input,
685
+ subquestion_1_text_input,
686
+ subquestion_1_answer_input,
687
+ subquestion_2_text_input,
688
+ subquestion_2_answer_input,
689
+ subquestion_3_text_input,
690
+ subquestion_3_answer_input,
691
+ subquestion_4_text_input,
692
+ subquestion_4_answer_input,
693
+ subquestion_5_text_input,
694
+ subquestion_5_answer_input,
695
+ question_input,
696
+ final_answer_input,
697
+ rationale_text_input,
698
+ image_attribution_input,
699
+ image1,
700
+ image2,
701
+ image3,
702
+ image4,
703
+ rationale_image1,
704
+ rationale_image2,
705
+ loaded_question_id,
706
+ ],
707
+ )
708
+
709
+ # Modify validate_and_generate to handle updates
710
+ def validate_and_generate(
711
+ nm,
712
+ em,
713
+ inst,
714
+ orp,
715
+ auth,
716
+ qcats,
717
+ sq1t,
718
+ sq1a,
719
+ sq2t,
720
+ sq2a,
721
+ sq3t,
722
+ sq3a,
723
+ sq4t,
724
+ sq4a,
725
+ sq5t,
726
+ sq5a,
727
+ q,
728
+ fa,
729
+ rt,
730
+ ia,
731
+ i1,
732
+ i2,
733
+ i3,
734
+ i4,
735
+ ri1,
736
+ ri2,
737
+ stored_question_id, # Add this parameter
738
+ ):
739
+ # Validation code remains the same
740
+ missing_fields = []
741
+ if not nm or not nm.strip():
742
+ missing_fields.append("Name")
743
+ if not em or not em.strip():
744
+ missing_fields.append("Email Address")
745
+ if not inst or not inst.strip():
746
+ missing_fields.append("Institution")
747
+ if not q or not q.strip():
748
+ missing_fields.append("Question")
749
+ if not fa or not fa.strip():
750
+ missing_fields.append("Final Answer")
751
+ if not i1:
752
+ missing_fields.append("First Question Image")
753
+ if not ia or not ia.strip():
754
+ missing_fields.append("Image Attribution")
755
+ if not sq1t or not sq1t.strip() or not sq1a or not sq1a.strip():
756
+ missing_fields.append("First Sub-question and Answer")
757
+ if not sq2t or not sq2t.strip() or not sq2a or not sq2a.strip():
758
+ missing_fields.append("Second Sub-question and Answer")
759
+ if not sq3t or not sq3t.strip() or not sq3a or not sq3a.strip():
760
+ missing_fields.append("Third Sub-question and Answer")
761
+ if not sq4t or not sq4t.strip() or not sq4a or not sq4a.strip():
762
+ missing_fields.append("Fourth Sub-question and Answer")
763
+ if not sq5t or not sq5t.strip() or not sq5a or not sq5a.strip():
764
+ missing_fields.append("Fifth Sub-question and Answer")
765
+
766
+ if missing_fields:
767
+ warning_msg = f"Required fields missing: {', '.join(missing_fields)} ⛔️"
768
+ gr.Warning(warning_msg, duration=5)
769
+ return gr.Button(interactive=True), gr.Dropdown(
770
+ choices=load_existing_questions()
771
+ )
772
+
773
+ # Use the stored ID instead of extracting from dropdown
774
+ existing_id = stored_question_id if stored_question_id else None
775
+
776
+ results = generate_json_files(
777
+ nm,
778
+ em,
779
+ inst,
780
+ orp,
781
+ auth,
782
+ qcats,
783
+ sq1t,
784
+ sq1a,
785
+ sq2t,
786
+ sq2a,
787
+ sq3t,
788
+ sq3a,
789
+ sq4t,
790
+ sq4a,
791
+ sq5t,
792
+ sq5a,
793
+ q,
794
+ fa,
795
+ rt,
796
+ ia,
797
+ i1,
798
+ i2,
799
+ i3,
800
+ i4,
801
+ ri1,
802
+ ri2,
803
+ existing_id,
804
+ )
805
+
806
+ action = "updated" if existing_id else "created"
807
+ gr.Info(
808
+ f"Dataset item {action} successfully! πŸŽ‰ Clear the form to submit a new one"
809
+ )
810
+
811
+ return gr.update(interactive=False), gr.Dropdown(
812
+ choices=load_existing_questions()
813
+ )
814
+
815
+ # Update submit button click handler to match inputs/outputs correctly
816
+ submit_button.click(
817
+ fn=validate_and_generate,
818
+ inputs=[
819
+ name_input,
820
+ email_address_input,
821
+ institution_input,
822
+ openreview_profile_input,
823
+ authorship_input,
824
+ question_categories_input,
825
+ subquestion_1_text_input,
826
+ subquestion_1_answer_input,
827
+ subquestion_2_text_input,
828
+ subquestion_2_answer_input,
829
+ subquestion_3_text_input,
830
+ subquestion_3_answer_input,
831
+ subquestion_4_text_input,
832
+ subquestion_4_answer_input,
833
+ subquestion_5_text_input,
834
+ subquestion_5_answer_input,
835
+ question_input,
836
+ final_answer_input,
837
+ rationale_text_input,
838
+ image_attribution_input,
839
+ image1,
840
+ image2,
841
+ image3,
842
+ image4,
843
+ rationale_image1,
844
+ rationale_image2,
845
+ loaded_question_id,
846
+ ],
847
+ outputs=[submit_button, existing_questions],
848
+ )
849
+
850
+ # Fix the clear_form_fields function
851
+ def clear_form_fields(name, email, inst, openreview, authorship, *args):
852
+ outputs = [
853
+ name, # Preserve name
854
+ email, # Preserve email
855
+ inst, # Preserve institution
856
+ openreview, # Preserve openreview
857
+ authorship, # Preserve authorship interest
858
+ gr.update(value=""), # Clear question categories
859
+ gr.update(value="N/A"), # Reset subquestion 1 text to N/A
860
+ gr.update(value="N/A"), # Reset subquestion 1 answer to N/A
861
+ gr.update(value="N/A"), # Reset subquestion 2 text to N/A
862
+ gr.update(value="N/A"), # Reset subquestion 2 answer to N/A
863
+ gr.update(value="N/A"), # Reset subquestion 3 text to N/A
864
+ gr.update(value="N/A"), # Reset subquestion 3 answer to N/A
865
+ gr.update(value="N/A"), # Reset subquestion 4 text to N/A
866
+ gr.update(value="N/A"), # Reset subquestion 4 answer to N/A
867
+ gr.update(value="N/A"), # Reset subquestion 5 text to N/A
868
+ gr.update(value="N/A"), # Reset subquestion 5 answer to N/A
869
+ gr.update(value=""), # Clear question
870
+ gr.update(value=""), # Clear final answer
871
+ gr.update(value=""), # Clear rationale text
872
+ gr.update(value=""), # Clear image attribution
873
+ None, # Clear image1
874
+ None, # Clear image2
875
+ None, # Clear image3
876
+ None, # Clear image4
877
+ None, # Clear rationale image1
878
+ None, # Clear rationale image2
879
+ None, # Clear output file urls
880
+ gr.Button(interactive=True), # Re-enable submit button
881
+ gr.update(choices=load_existing_questions()), # Update dropdown
882
+ None, # Changed from gr.State(value=None) to just None
883
+ ]
884
+ gr.Info("Form cleared! Ready for new submission πŸ”„")
885
+ return outputs
886
+
887
+ # Update the clear button click handler
888
+ clear_button.click(
889
+ fn=clear_form_fields,
890
+ inputs=[
891
+ name_input,
892
+ email_address_input,
893
+ institution_input,
894
+ openreview_profile_input,
895
+ authorship_input,
896
+ ],
897
+ outputs=[
898
+ name_input,
899
+ email_address_input,
900
+ institution_input,
901
+ openreview_profile_input,
902
+ authorship_input,
903
+ question_categories_input,
904
+ subquestion_1_text_input,
905
+ subquestion_1_answer_input,
906
+ subquestion_2_text_input,
907
+ subquestion_2_answer_input,
908
+ subquestion_3_text_input,
909
+ subquestion_3_answer_input,
910
+ subquestion_4_text_input,
911
+ subquestion_4_answer_input,
912
+ subquestion_5_text_input,
913
+ subquestion_5_answer_input,
914
+ question_input,
915
+ final_answer_input,
916
+ rationale_text_input,
917
+ image_attribution_input,
918
+ image1,
919
+ image2,
920
+ image3,
921
+ image4,
922
+ rationale_image1,
923
+ rationale_image2,
924
+ output_file_urls,
925
+ submit_button,
926
+ existing_questions,
927
+ loaded_question_id,
928
+ ],
929
+ )
930
+
931
+ if __name__ == "__main__":
932
+ print("Initializing app...")
933
+ sync_with_hub() # Sync before launching the app
934
+ print("Starting Gradio interface...")
935
+ demo.launch()