Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,583 Bytes
59b2a81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
'''
This repository is used to prepare Bridge dataset
'''
import os, sys, shutil
def read_bridge_v2(dataset_path, train_store_path, test_store_path, test_dataset_lists, copyfile=True):
# copyfile is True most of the time
start_idx = 0
target_lists = []
prefix_len = len(dataset_path) + 1
# Iterate all the folders inside
for scene_name in sorted(os.listdir(dataset_path)):
print("We are reading scene ", scene_name)
scene_dir = os.path.join(dataset_path, scene_name)
for task_name in sorted(os.listdir(scene_dir)):
task_dir = os.path.join(scene_dir, task_name)
for order_name in sorted(os.listdir(task_dir)):
order_dir = os.path.join(task_dir, order_name)
for time_clock in sorted(os.listdir(order_dir)):
if time_clock == "lmdb":
continue # Skip lmdb folder
time_dir = os.path.join(order_dir, time_clock, "raw", "traj_group0")
if not os.path.exists(time_dir):
print("time_dir does not exist for ", time_dir)
continue
for traj_name in sorted(os.listdir(time_dir)):
traj_path = os.path.join(time_dir, traj_name)
if not os.path.isdir(traj_path):
print("traj_path does not exist for ", traj_path)
continue
# Directly move policy_out_file_path; just in case there is also valuable information there
policy_out_file_path = os.path.join(traj_path, "policy_out.pkl")
if not os.path.exists(policy_out_file_path):
continue
# Check the lang txt file
lang_txt_file_path = os.path.join(traj_path, "lang.txt")
if not os.path.exists(lang_txt_file_path):
continue
for img_name in sorted(os.listdir(traj_path)):
if img_name != "images0": # Only consider one camera angle
continue
img_folder_path = os.path.join(traj_path, img_name)
if not os.path.isdir(img_folder_path):
print("img_folder_path does not exist for ", img_folder_path)
continue
############################################ Main Process ####################################################
# # First Sanity check (Make sure the input source is jpg good)
# length = len(os.listdir(img_folder_path))
# status = True
# for check_idx in range(length):
# if not os.path.exists(os.path.join(img_folder_path, 'im_' + str(check_idx) + '.jpg')): # Should be sequentially exists
# status = False
# break
# Now we can copy the folder to our destination
target_lists.append(img_folder_path)
if copyfile:
print("img_folder_path[prefix_len:] is ", img_folder_path[prefix_len:])
if img_folder_path[prefix_len:] in test_dataset_lists:
# Store to test set
target_dir = os.path.join(test_store_path, str(start_idx))
else:
# This is training set
target_dir = os.path.join(train_store_path, str(start_idx))
# Now we can copy the folder to our destination
print("Copy " + str(img_folder_path) + " to " + str(os.path.join(train_store_path, str(start_idx))))
shutil.copytree(img_folder_path, target_dir)
# Sanity check
length = len(os.listdir(target_dir))
status = True
for check_idx in range(length):
if not os.path.exists(os.path.join(target_dir, 'im_' + str(check_idx) + '.jpg' )): # Should be sequentially exists
status = False
break
if not status:
# If they didn't have sequential files we need, we will remove and begin again without updating start_idx
print("This file cannot pass the sanity check. We will remove it!")
shutil.rmtree(target_dir)
continue
# Move other auxilary files
shutil.copy(policy_out_file_path, os.path.join(target_dir, "policy_out.pkl"))
shutil.copy(lang_txt_file_path, os.path.join(target_dir, "lang.txt"))
# Update the idx
start_idx += 1
print("We have ", start_idx)
# Return a list of file path
return target_lists
if __name__ == "__main__":
dataset_path = "/nfs/turbo/jjparkcv-turbo-large/boyangwa/raw/bridge_data_v2"
train_store_path = "../sanity_check/bridge_v2_raw"
test_store_path = "../sanity_check/bridge_v2_test_raw"
test_dataset_predefined_path = "test_path_v2.txt"
# Make dir if needed
if os.path.exists(train_store_path):
shutil.rmtree(train_store_path)
os.makedirs(train_store_path)
if os.path.exists(test_store_path):
shutil.rmtree(test_store_path)
os.makedirs(test_store_path)
# Read Test dataset path
test_dataset_lists = []
read_file = open(test_dataset_predefined_path, "r")
for line in read_file.readlines():
test_dataset_lists.append(line[:-1])
print("test_dataset_lists is ", test_dataset_lists)
read_bridge_v2(dataset_path, train_store_path, test_store_path, test_dataset_lists)
|