Yunhao Fang commited on
Commit
40160d0
·
1 Parent(s): 58bf508

initialize space.

Browse files
Files changed (2) hide show
  1. app.py +542 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,542 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import json
4
+ import threading
5
+ from pathlib import Path
6
+ from moviepy.editor import VideoFileClip
7
+ import hashlib
8
+ import random
9
+ import string
10
+ from PIL import Image
11
+
12
+ PHYSICAL_LAWS = [
13
+ "Violation of Newton's Law: Objects move without any external force.",
14
+ "Violation of the Law of Conservation of Mass or Solid Constitutive Law: Objects deform or distort irregularly.",
15
+ "Violation of Fluid Constitutive Law: Liquids flow in an unnatural or irregular manner.",
16
+ "Violation of Non-physical Penetration: Objects unnaturally pass through each other.",
17
+ "Violation of Gravity: Objects behave inconsistently with gravity, such as floating in the air.",
18
+ "No violation!"
19
+ ]
20
+
21
+ # List of commonsense violations
22
+ COMMON_SENSE = [
23
+ "Poor Aesthetics: Visually unappealing or low-quality content.",
24
+ "Temporal Inconsistency: Flickering, choppiness, or sudden appearance/disappearance of irrelevant objects.",
25
+ "No violation!"
26
+ ]
27
+
28
+ # Example images for physical law violations
29
+ EXAMPLE_IMAGES = {
30
+ "newtons_law": "test_images/law_violation1.jpg",
31
+ "mass_conservation": "test_images/law_violation2.jpg",
32
+ "fluid.": "test_images/law_violation3.jpg",
33
+ "penetration": "test_images/law_violation4.jpg",
34
+ "gravity": "test_images/law_violation5.jpg"
35
+ }
36
+
37
+ def string_to_md5(input_string, max_digits=12):
38
+ return hashlib.md5(input_string.encode()).hexdigest()[:max_digits]
39
+
40
+ def generate_random_id(length=6):
41
+ return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
42
+
43
+ class VideoAnnotator:
44
+ def __init__(self, videos, annotation_base_dir, max_resolution=(640, 480)):
45
+ self.annotation_base_dir = Path(annotation_base_dir)
46
+ self.max_resolution = max_resolution
47
+ self.videos = videos
48
+ self.current_index = 0
49
+ self.file_locks = {}
50
+ self.current_labeler = None
51
+ self.current_labeler_file = None
52
+
53
+ def get_annotation_file_path(self, labeler_email):
54
+ md5_email = string_to_md5(labeler_email, max_digits=12)
55
+ # random_id = generate_random_id()
56
+ # file_name = f"md5-{md5_email}.{random_id}.json"
57
+ file_name = f"md5-{md5_email}.json"
58
+ return self.annotation_base_dir / file_name
59
+
60
+ def load_annotations(self, labeler_email):
61
+ file_path = self.get_annotation_file_path(labeler_email)
62
+ if file_path.exists():
63
+ with open(file_path, 'r') as f:
64
+ return json.load(f)
65
+ return {}
66
+
67
+ def save_annotations(self, labeler_email, annotations):
68
+ file_path = self.get_annotation_file_path(labeler_email)
69
+ self.annotation_base_dir.mkdir(parents=True, exist_ok=True)
70
+
71
+ if file_path not in self.file_locks:
72
+ self.file_locks[file_path] = threading.Lock()
73
+
74
+ with self.file_locks[file_path]:
75
+ with open(file_path, 'w') as f:
76
+ json.dump(annotations, f, indent=2)
77
+
78
+ def get_current_video(self):
79
+ if self.videos:
80
+ video_path = self.videos[self.current_index]
81
+ resized_path = self.resize_video_if_needed(video_path)
82
+ return str(resized_path.resolve())
83
+ return None
84
+
85
+ def resize_video_if_needed(self, video_path):
86
+ from moviepy.video.io.ffmpeg_writer import ffmpeg_write_video
87
+ clip = VideoFileClip(str(video_path))
88
+ width, height = clip.size
89
+
90
+ if width > self.max_resolution[0] or height > self.max_resolution[1]:
91
+ resized_clip = clip.resize(height=self.max_resolution[1])
92
+ cleaned_name = video_path.name.replace(" ", "_")
93
+ resized_path = video_path.with_name(f"resized_{cleaned_name}")
94
+ fps = clip.fps if clip.fps else 8.0
95
+ ffmpeg_write_video(resized_clip, str(resized_path), fps, codec="libx264")
96
+ return resized_path
97
+ return video_path
98
+
99
+ def update_annotation(self, video_name, labeler_email, instruction_check, law_annotations, commonsense):
100
+ video_name = postprocess_name_for_gradio(video_name)
101
+ annotations = self.load_annotations(labeler_email)
102
+ if instruction_check and video_name not in annotations:
103
+ annotations[video_name] = {
104
+ "labeler": labeler_email,
105
+ "law_details": law_annotations,
106
+ "commonsense": commonsense,
107
+ "instruction": instruction_check
108
+ }
109
+ self.save_annotations(labeler_email, annotations)
110
+
111
+ def next_video(self):
112
+ if self.videos:
113
+ self.current_index = min(self.current_index + 1, len(self.videos) - 1)
114
+ return self.get_current_video()
115
+
116
+ def prev_video(self):
117
+ if self.videos:
118
+ self.current_index = max(self.current_index - 1, 0)
119
+ return self.get_current_video()
120
+
121
+ def jump_to_video(self, index):
122
+ if self.videos:
123
+ self.current_index = max(0, min(index, len(self.videos) - 1))
124
+ return self.get_current_video()
125
+
126
+ def set_current_labeler(self, labeler_email):
127
+ self.current_labeler = labeler_email
128
+ self.current_labeler_file = self.get_annotation_file_path(labeler_email)
129
+
130
+ def postprocess_name_for_gradio(name):
131
+ return name.replace("–","").replace("+","").replace("-","").replace("t2v","").replace("(", "").replace(")","").replace(",","").replace("_","").replace(".","")
132
+
133
+ def get_cur_data(instruction_data, video_name):
134
+ video_name = postprocess_name_for_gradio(video_name)
135
+ if "resized_" in video_name:
136
+ clean_name = video_name.replace("resized_", "")
137
+ clean_name = "_".join(clean_name.split("_")[2:])
138
+ else:
139
+ clean_name = video_name
140
+ # print(clean_name, instruction_data.keys())
141
+ for k in instruction_data.keys():
142
+ if k in clean_name:
143
+ real_name = k
144
+ cur_data = instruction_data[real_name]
145
+ return cur_data
146
+
147
+ def create_interface(instruction_data, videos, annotation_base_dir):
148
+ annotator = VideoAnnotator(videos, annotation_base_dir)
149
+
150
+ def update_video():
151
+ video_path = annotator.get_current_video()
152
+ if video_path is None:
153
+ return (None, annotator.current_labeler or "", "[system] Video not in benchmark", "[system] Video not in benchmark", *[False for _ in PHYSICAL_LAWS], *[False for _ in COMMON_SENSE])
154
+ video_name = Path(video_path).name
155
+ cur_data = get_cur_data(instruction_data, video_name)
156
+ current_annotations = {}
157
+ if annotator.current_labeler:
158
+ annotations = annotator.load_annotations(annotator.current_labeler)
159
+ current_annotations = annotations.get(
160
+ postprocess_name_for_gradio(video_name),
161
+ {"labeler": annotator.current_labeler, "law_details": {law: False for law in PHYSICAL_LAWS}, "commonsense": {cs: False for cs in COMMON_SENSE}, "instruction": None}
162
+ )
163
+ else:
164
+ current_annotations = {"labeler": "", "law_details": {law: False for law in PHYSICAL_LAWS}, "commonsense": {cs: False for cs in COMMON_SENSE},"instruction": None}
165
+
166
+ first_frame = cur_data["text_first_frame"]
167
+ num_annotations = str(len(annotations)) if 'annotations' in locals() else "0"
168
+ text_instruction = cur_data["text_instruction"]
169
+
170
+ # Flatten the outputs
171
+ outputs = [
172
+ video_path,
173
+ current_annotations["labeler"] or "",
174
+ num_annotations,
175
+ current_annotations["instruction"],
176
+ text_instruction
177
+ ]
178
+ # Add individual law checkbox values
179
+ outputs.extend([current_annotations["law_details"].get(law, False) for law in PHYSICAL_LAWS])
180
+ # Add individual commonsense checkbox values
181
+ outputs.extend([current_annotations["commonsense"].get(cs, False) for cs in COMMON_SENSE])
182
+ return outputs
183
+
184
+
185
+ def save_current_annotation(video_path, labeler_email, instruction_check, law_values, commonsense_values, skipped: bool=False):
186
+ if not skipped:
187
+ if video_path is None:
188
+ return "No video loaded to save annotations."
189
+ if not labeler_email:
190
+ return "Please enter a valid labeler email before saving annotations."
191
+ video_name = Path(video_path).name
192
+ law_annotations = {law: bool(value) for law, value in zip(PHYSICAL_LAWS, law_values)}
193
+ commonsense_annotations = {cs: bool(value) for cs, value in zip(COMMON_SENSE, commonsense_values)}
194
+ annotator.set_current_labeler(labeler_email)
195
+ annotator.update_annotation(video_name, labeler_email, instruction_check, law_annotations, commonsense_annotations)
196
+ return f"Annotation saved successfully for {labeler_email}!"
197
+ else:
198
+ video_name = Path(video_path).name
199
+ law_annotations = {law: bool(value) for law, value in zip(PHYSICAL_LAWS, law_values)}
200
+ commonsense_annotations = {cs: bool(value) for cs, value in zip(COMMON_SENSE, commonsense_values)}
201
+ annotator.set_current_labeler(labeler_email)
202
+ annotator.update_annotation(video_name, labeler_email, instruction_check, law_annotations, commonsense_annotations)
203
+ return f"Annotation saved successfully for {labeler_email}!"
204
+
205
+
206
+ def load_anns_callback(labeler_email):
207
+ """
208
+ Load annotations for the given labeler email and jump to the next unlabeled video.
209
+ Returns the updated interface state.
210
+ """
211
+ if not labeler_email:
212
+ return update_video()
213
+
214
+ # Set the current labeler
215
+ annotator.set_current_labeler(labeler_email)
216
+
217
+ # Load existing annotations
218
+ annotations = annotator.load_annotations(labeler_email)
219
+
220
+ # Find the first video that hasn't been annotated
221
+ next_unannotated_index = None
222
+ for i, video in enumerate(annotator.videos):
223
+ video_name = postprocess_name_for_gradio("resized_" + Path(video).name)
224
+ if video_name not in annotations:
225
+ next_unannotated_index = i
226
+ break
227
+
228
+ # If we found an unannotated video, jump to it
229
+ if next_unannotated_index is not None:
230
+ annotator.jump_to_video(next_unannotated_index)
231
+ video_path = annotator.get_current_video()
232
+ video_name = Path(video_path).name
233
+ cur_data = get_cur_data(instruction_data, video_name)
234
+
235
+ # Prepare default state for the new video
236
+ return [
237
+ video_path, # video
238
+ labeler_email, # labeler
239
+ str(len(annotations)), # num_annotations
240
+ None, # instruction_check (default value)
241
+ cur_data["text_instruction"], # text_instruction
242
+ *[False for _ in PHYSICAL_LAWS], # law checkboxes
243
+ *[False for _ in COMMON_SENSE] # commonsense checkboxes
244
+ ]
245
+ else:
246
+ # If all videos are annotated, stay at current video but update the interface
247
+ current_video = annotator.get_current_video()
248
+ if current_video:
249
+ video_name = Path(current_video).name
250
+ current_annotations = annotations.get(
251
+ postprocess_name_for_gradio(video_name),
252
+ {
253
+ "labeler": labeler_email,
254
+ "law_details": {law: False for law in PHYSICAL_LAWS},
255
+ "commonsense": {cs: False for cs in COMMON_SENSE},
256
+ "instruction": "3"
257
+ }
258
+ )
259
+ cur_data = get_cur_data(instruction_data, video_name)
260
+
261
+ return [
262
+ current_video,
263
+ labeler_email,
264
+ str(len(annotations)),
265
+ current_annotations["instruction"],
266
+ cur_data["text_instruction"],
267
+ *[current_annotations["law_details"].get(law, False) for law in PHYSICAL_LAWS],
268
+ *[current_annotations["commonsense"].get(cs, False) for cs in COMMON_SENSE]
269
+ ]
270
+ else:
271
+ # Fallback for empty video list
272
+ return [
273
+ None,
274
+ labeler_email,
275
+ "0",
276
+ None,
277
+ "[system] No videos available",
278
+ *[False for _ in PHYSICAL_LAWS],
279
+ *[False for _ in COMMON_SENSE]
280
+ ]
281
+
282
+ def check_inputs(labeler_email, instruction_check):
283
+ """Helper function to check input validity"""
284
+ if not labeler_email:
285
+ return False, "Please enter your email before proceeding."
286
+ if not instruction_check:
287
+ return False, "Please select whether the video follows the instruction before proceeding."
288
+ return True, ""
289
+
290
+ def confirm_callback(video_path, labeler_email, instruction_check, *checkbox_values):
291
+
292
+ pass
293
+
294
+ def skip_callback(video_path, labeler_email, instruction_check, *checkbox_values):
295
+ ## save annotations with a flag skipped
296
+ num_laws = len(PHYSICAL_LAWS)
297
+ law_values = checkbox_values[:num_laws]
298
+ commonsense_values = checkbox_values[num_laws:]
299
+ breakpoint()
300
+ save_current_annotation(video_path, labeler_email, instruction_check, law_values, commonsense_values, skipped=True)
301
+ annotator.next_video()
302
+ return update_video()
303
+
304
+ def next_video_callback(video_path, labeler_email, instruction_check, *checkbox_values):
305
+ breakpoint()
306
+ # First check inputs
307
+ is_valid, message = check_inputs(labeler_email, instruction_check)
308
+ if not is_valid:
309
+ # Return current state with error message
310
+ gr.Warning(message)
311
+ return update_video()
312
+ # Split checkbox values into law and commonsense values
313
+ num_laws = len(PHYSICAL_LAWS)
314
+ law_values = checkbox_values[:num_laws]
315
+ commonsense_values = checkbox_values[num_laws:]
316
+
317
+ save_current_annotation(video_path, labeler_email, instruction_check, law_values, commonsense_values)
318
+ annotator.next_video()
319
+ return update_video()
320
+
321
+ def prev_video_callback(video_path, labeler_email, instruction_check, *checkbox_values):
322
+ # First check inputs
323
+ is_valid, message = check_inputs(labeler_email, instruction_check)
324
+ if not is_valid:
325
+ # Return current state with error message
326
+ gr.Warning(message)
327
+ return update_video()
328
+ # Split checkbox values into law and commonsense values
329
+ num_laws = len(PHYSICAL_LAWS)
330
+ law_values = checkbox_values[:num_laws]
331
+ commonsense_values = checkbox_values[num_laws:]
332
+
333
+ save_current_annotation(video_path, labeler_email, instruction_check, law_values, commonsense_values)
334
+ annotator.prev_video()
335
+ return update_video()
336
+
337
+ with gr.Blocks() as interface:
338
+ # gr.Markdown("# Video Annotation Interface")
339
+
340
+ with gr.Row():
341
+ with gr.Column(scale=1):
342
+ video = gr.Video(label="Current Video", format="mp4", height=450, width=800)
343
+ with gr.Row():
344
+ with gr.Column(scale=2):
345
+ labeler = gr.Textbox(
346
+ label="Labeler ID (your email)",
347
+ placeholder="Enter your email",
348
+ interactive=True,
349
+ )
350
+ with gr.Column(scale=1):
351
+ num_annotations = gr.Textbox(
352
+ label="Annotations Count",
353
+ placeholder="0",
354
+ interactive=False,
355
+ )
356
+ text_instruction = gr.Textbox(label="Text prompt", interactive=False)
357
+ instruction_check = gr.Radio(
358
+ label="Task1: Does this video follow the instruction?",
359
+ choices=[
360
+ "0: Not at all!!!",
361
+ "1: Correct object, wrong motion (or vice versa).",
362
+ "2: Follow instruction, fail task.",
363
+ "3: Follow instruction, complete task."
364
+ ],
365
+ type="value",
366
+ value="3"
367
+ )
368
+ with gr.Row():
369
+ with gr.Column(scale=1):
370
+ skip_btn = gr.Button("Skip! Video Corrupted")
371
+ with gr.Column(scale=1):
372
+ confirm_btn = gr.Button("Confirm!")
373
+ with gr.Row():
374
+ with gr.Column(scale=1):
375
+ prev_btn = gr.Button("Previous Video")
376
+ with gr.Column(scale=1):
377
+ next_btn = gr.Button("Next Video")
378
+ load_btn = gr.Button("Load Annotations")
379
+
380
+ with gr.Column(scale=1):
381
+ gr.Markdown("Task2: [Based on your first impression] Select the major <span style='color: blue;'>commonsense violations</span> in the video: <span style='color: red;'>[multiple (0-2) choices]</span>")
382
+ commonsense_checkboxes = []
383
+ for cs in COMMON_SENSE:
384
+ commonsense_checkboxes.append(gr.Checkbox(label=cs))
385
+
386
+ gr.Markdown("Task3: Please select all physics laws the video <span style='color: blue;'>violates</span>: <span style='color: red;'>[multiple (0-5) choices]</span>")
387
+ law_checkboxes = []
388
+ for i, law in enumerate(PHYSICAL_LAWS):
389
+ checkbox = gr.Checkbox(label=law, interactive=True)
390
+ law_checkboxes.append(checkbox)
391
+ # if i < len(PHYSICAL_LAWS) - 1:
392
+ # image_path = os.path.join(os.path.abspath(__file__).rsplit("/", 1)[0], list(EXAMPLE_IMAGES.values())[i])
393
+ if i != len(PHYSICAL_LAWS) - 1:
394
+ image_path = list(EXAMPLE_IMAGES.values())[i]
395
+
396
+ image = Image.open(image_path).convert("RGB")
397
+ gr.Image(value=image, label=f"Example {i+1}", show_label=True, height=68, width=700)
398
+
399
+ # Create a flat list of all inputs
400
+ all_inputs = [video, labeler, instruction_check] + law_checkboxes + commonsense_checkboxes
401
+ # Create a flat list of all outputs
402
+ all_outputs = [video, labeler, num_annotations, instruction_check, text_instruction] + law_checkboxes + commonsense_checkboxes
403
+
404
+ # Set up event handlers with flattened inputs and outputs
405
+ skip_btn.click(
406
+ skip_callback,
407
+ inputs=all_inputs,
408
+ outputs=all_outputs
409
+ )
410
+
411
+ load_btn.click(
412
+ load_anns_callback,
413
+ inputs=[labeler],
414
+ outputs=all_outputs
415
+ )
416
+
417
+ next_btn.click(
418
+ next_video_callback,
419
+ inputs=all_inputs,
420
+ outputs=all_outputs
421
+ )
422
+
423
+ prev_btn.click(
424
+ prev_video_callback,
425
+ inputs=all_inputs,
426
+ outputs=all_outputs
427
+ )
428
+
429
+ interface.load(
430
+ fn=update_video,
431
+ inputs=None,
432
+ outputs=all_outputs
433
+ )
434
+
435
+ return interface
436
+
437
+ if __name__ == "__main__":
438
+ import argparse
439
+ parser = argparse.ArgumentParser(description="Annotation")
440
+ parser.add_argument("--domain", type=str, help="")
441
+ parser.add_argument("--src", type=str, help="")
442
+
443
+ # Parse the arguments
444
+ args = parser.parse_args()
445
+
446
+ domains = ["robotics", "humans", "general", "av", "game"]
447
+ src = ["CogVideo-I2V", "CogVideo-T2V", "Open-Sora-I2V", "Open-Sora-T2V", "Pandora", "TurboT2V", "Open-Sora-Plan-I2V", "Open-Sora-Plan-T2V"]
448
+
449
+ assert args.domain in domains, f"{args.domain} not in available domain."
450
+ assert args.src in src, f"{args.src} not in available model src."
451
+
452
+ instruction_base_path = "domains"
453
+ src_video_map = {
454
+ "CogVideo-I2V": "/home/yunhaof/workspace/datasets/outputs_v2",
455
+ "CogVideo-T2V": "/home/yunhaof/workspace/datasets/outputs_v2",
456
+ "Pandora": "/lustre/fsw/portfolios/nvr/users/dachengl/VILA-EWM/outputs",
457
+ "Open-Sora-I2V": "/lustre/fsw/portfolios/nvr/users/dachengl/Open-Sora/outputs",
458
+ "Open-Sora-T2V": "/lustre/fsw/portfolios/nvr/users/dachengl/Open-Sora/outputs",
459
+ "TurboT2V": "",
460
+ "Open-Sora-Plan-I2V": "/home/yunhaof/workspace/projects/Open-Sora-Plan/ewm_benchmark/gradio_videos",
461
+ "Open-Sora-Plan-T2V": "/home/yunhaof/workspace/projects/Open-Sora-Plan/ewm_benchmark/gradio_videos"
462
+ }
463
+
464
+ # Adhoc solution to naming mismatch
465
+ domain_name_map = {
466
+ "humans": "humans",
467
+ "game": "game",
468
+ "general": "general",
469
+ "av": "av",
470
+ "robotics": "robotics"
471
+ }
472
+ cur_domain = domain_name_map[args.domain]
473
+
474
+ # video_folder = "/lustre/fsw/portfolios/nvr/users/dachengl/CogVideo/outputs"
475
+ video_folder = Path(src_video_map[args.src])
476
+ # print("Processing the 100 videos for the current annotation.")
477
+ videos = []
478
+ if args.src == "CogVideo-I2V":
479
+ for v in video_folder.glob("*.mp4"):
480
+ if "t2v" not in v.stem and "resized_" not in v.stem and f"{cur_domain}_" in v.stem:
481
+ videos.append(v)
482
+ elif args.src == "CogVideo-T2V":
483
+ for v in video_folder.glob("*.mp4"):
484
+ if "t2v" in v.stem and "resized_" not in v.stem and f"{cur_domain}_" in v.stem:
485
+ videos.append(v)
486
+ elif args.src == "Pandora":
487
+ for v in video_folder.glob("*.mp4"):
488
+ if "resized_" not in v.stem and f"{cur_domain}_" in v.stem:
489
+ videos.append(v)
490
+ elif args.src == "Open-Sora-I2V":
491
+ for v in video_folder.glob("*.mp4"):
492
+ if "t2v" not in v.stem and "resized_" not in v.stem and f"{cur_domain}_" in v.stem:
493
+ videos.append(v)
494
+ elif args.src == "Open-Sora-T2V":
495
+ for v in video_folder.glob("*.mp4"):
496
+ if "t2v" in v.stem and "resized_" not in v.stem and f"{cur_domain}_" in v.stem:
497
+ videos.append(v)
498
+ elif args.src == "Open-Sora-Plan-I2V":
499
+ for v in video_folder.glob("*.mp4"):
500
+ if "t2v" not in v.stem and "resized_" not in v.stem and f"{cur_domain}_" in v.stem:
501
+ videos.append(v)
502
+ elif args.src == "Open-Sora-Plan-T2V":
503
+ for v in video_folder.glob("*.mp4"):
504
+ if "t2v" in v.stem and "resized_" not in v.stem and f"{cur_domain}_" in v.stem:
505
+ videos.append(v)
506
+ elif args.src == "TurboT2V":
507
+ for v in video_folder.glob("*.mp4"):
508
+ if "t2v" in v.stem and "resized_" not in v.stem and f"{cur_domain}_" in v.stem:
509
+ videos.append(v)
510
+
511
+ videos = sorted(videos)
512
+ print(f"Number of videos: {len(videos)}")
513
+
514
+ instruction_file = f"domains/{args.domain}/dataset_v2/instruction_ewm.json"
515
+ annotation_base = "annotations"
516
+ os.makedirs(annotation_base, exist_ok=True)
517
+ annotation_dir = os.path.join(annotation_base, f"{args.domain}_{args.src}")
518
+
519
+ instruction_data = {}
520
+ with open(instruction_file, "r") as f:
521
+ instructions = json.load(f)
522
+ for instruction in instructions:
523
+ file_name = os.path.basename(instruction["video_path"])
524
+ # gradio will eliminate -
525
+ file_name = postprocess_name_for_gradio(file_name)#.replace("-", "").replace("_t2v","")
526
+ instruction_data[file_name] = instruction
527
+
528
+ # perform a check that these videos will appear on the instruction, with or without the resized_
529
+ for _video in videos:
530
+ try:
531
+ _ = get_cur_data(instruction_data, postprocess_name_for_gradio(Path(_video).name))#.replace("-", "").replace("_t2v",""))
532
+ except:
533
+ print(f"parsing name {_video} fails, you may want to look at the name in instruction_ewm.json")
534
+ assert False
535
+ try:
536
+ _ = get_cur_data(instruction_data, "resized_" + postprocess_name_for_gradio(Path(_video).name))# .replace("-", "").replace("_t2v",""))
537
+ except:
538
+ print(f"parsing name resized_{_video} fails, you may want to look at the name in instruction_ewm.json")
539
+ assert False
540
+
541
+ iface = create_interface(instruction_data, videos, annotation_dir)
542
+ iface.launch(share=True, allowed_paths=[src_video_map[args.src]])
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ os
3
+ json
4
+ threading
5
+ pathlib
6
+ moviepy
7
+ hashlib
8
+ random
9
+ string
10
+ matplotlib