File size: 6,051 Bytes
59b2a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
'''
    This repository is used to prepare Bridge dataset
'''
import os, sys, shutil


def read_bridge_v1(dataset_path, train_store_path, test_store_path, test_dataset_lists, copyfile=True):
    # copyfile is True when we need to copy the file to the target destination

    start_idx = 0
    target_lists = []
    prefix_len = len(dataset_path) + 1

    # Iterate all the folders inside
    for scene_name in sorted(os.listdir(dataset_path)):
        print("We are reading scene ", scene_name)
        scene_dir = os.path.join(dataset_path, scene_name)
        for task_name in sorted(os.listdir(scene_dir)):
            task_dir = os.path.join(scene_dir, task_name)

            for time_clock in sorted(os.listdir(task_dir)):
                if time_clock == "lmdb":
                    continue    # Skip lmdb folder
                
                time_dir = os.path.join(task_dir, time_clock, "raw", "traj_group0")
                if not os.path.exists(time_dir):
                    continue

                for traj_name in sorted(os.listdir(time_dir)):
                    traj_path = os.path.join(time_dir, traj_name)
                    if not os.path.isdir(traj_path):
                        continue
                    
                    # Directly move policy_out_file_path; just in case there is also valuable information there
                    policy_out_file_path = os.path.join(traj_path, "policy_out.pkl")
                    if not os.path.exists(policy_out_file_path):
                        continue

                    # Check the lang txt file
                    lang_txt_file_path = os.path.join(traj_path, "lang.txt")
                    if not os.path.exists(lang_txt_file_path):
                        continue


                    for img_name in sorted(os.listdir(traj_path)):
                        if img_name != "images0":       # Only consider one camera angle
                            continue

                        img_folder_path = os.path.join(traj_path, img_name)
                        if not os.path.isdir(img_folder_path):
                            continue
                        
                        ############################################ Main Process ####################################################

                        # # First Sanity check (Make sure the input source is jpg good)
                        # length = len(os.listdir(img_folder_path))
                        # status = True
                        # for check_idx in range(length):
                        #     if not os.path.exists(os.path.join(img_folder_path, 'im_' + str(check_idx) + '.jpg')):  # Should be sequentially exists
                        #         status = False
                        #         break

                        # Now we can copy the folder to our destination
                        target_lists.append(img_folder_path)
                        if copyfile:
                            print("img_folder_path[prefix_len:] is ", img_folder_path[prefix_len:])
                            if img_folder_path[prefix_len:] in test_dataset_lists:
                                # Store to test set
                                target_dir = os.path.join(test_store_path, str(start_idx))
                            else:
                                # This is training set
                                target_dir = os.path.join(train_store_path, str(start_idx))
                            
                            print("Copy " + str(img_folder_path) + " to " + str(target_dir))
                            shutil.copytree(img_folder_path, target_dir)


                            # Sanity check
                            length = len(os.listdir(target_dir))
                            status = True
                            for check_idx in range(length):
                                if not os.path.exists(os.path.join(target_dir, 'im_' + str(check_idx) + '.jpg')):  # Should be sequentially exists
                                    status = False
                                    break

                            if not status:
                                # If they didn't have sequential files we need, we will remove and begin again without updating start_idx
                                print("This file cannot pass the sanity check. We will remove it!")
                                shutil.rmtree(target_dir)
                                continue
                            
                            # Move other auxiliary files
                            shutil.copy(policy_out_file_path, os.path.join(target_dir, "policy_out.pkl"))
                            shutil.copy(lang_txt_file_path, os.path.join(target_dir, "lang.txt"))

                        ################################################################################################################

                        # Update the idx
                        start_idx += 1

    print("We have ", start_idx, " number of cases")

    # Return a list of file path
    return target_lists



if __name__ == "__main__":
    dataset_path = "/Path/to/Bridge/raw/bridge_data_v1/berkeley"   # Until Bridge v1 - berkeley section
    train_store_path = "/Path/to/Bridge/train/bridge_v1_raw"
    test_store_path = "/Path/to/Bridge/train/bridge_v1_test_raw"
    test_dataset_predefined_path = "test_path.txt"      # This will be providede by us


    # Make dir if needed
    if os.path.exists(train_store_path):
        shutil.rmtree(train_store_path)
    os.makedirs(train_store_path)
    if os.path.exists(test_store_path):
        shutil.rmtree(test_store_path)
    os.makedirs(test_store_path)


    # Read Test dataset path
    test_dataset_lists = []
    read_file = open(test_dataset_predefined_path, "r")
    for line in read_file.readlines():
        test_dataset_lists.append(line[:-1])
    print("test_dataset_lists is ", test_dataset_lists)


    read_bridge_v1(dataset_path, train_store_path, test_store_path, test_dataset_lists)