Upload train.py
Browse files
train.py
CHANGED
@@ -166,11 +166,11 @@ def add_image_path(example):
|
|
166 |
folder_name=example["folder_name"]
|
167 |
#image_name = example['image_id'] + '.jpg'
|
168 |
#image_path = os.path.join(r"D:\dump384_224x224_384\384", image_name)
|
169 |
-
image_path = [os.path.join(rf"
|
170 |
example['image_path'] = image_path
|
171 |
return example
|
172 |
|
173 |
-
ds = dataset.map(add_image_path, batched=True, batch_size=
|
174 |
print(ds)
|
175 |
|
176 |
ds = ds.train_test_split(test_size=0.02)
|
@@ -180,6 +180,8 @@ processed_dataset = ds.map(
|
|
180 |
function=preprocess_fn,
|
181 |
batched=True,
|
182 |
fn_kwargs={"max_target_length": 128},
|
|
|
|
|
183 |
#remove_columns=ds['train'].column_names
|
184 |
)
|
185 |
|
|
|
166 |
folder_name=example["folder_name"]
|
167 |
#image_name = example['image_id'] + '.jpg'
|
168 |
#image_path = os.path.join(r"D:\dump384_224x224_384\384", image_name)
|
169 |
+
image_path = [os.path.join(rf"/home/user/dump_small/{folder_name[i]}", image_name[i]) for i in range(len(image_name))]
|
170 |
example['image_path'] = image_path
|
171 |
return example
|
172 |
|
173 |
+
ds = dataset.map(add_image_path, batched=True, batch_size=8192)["train"]
|
174 |
print(ds)
|
175 |
|
176 |
ds = ds.train_test_split(test_size=0.02)
|
|
|
180 |
function=preprocess_fn,
|
181 |
batched=True,
|
182 |
fn_kwargs={"max_target_length": 128},
|
183 |
+
batch_size=8192,
|
184 |
+
num_proc=16,
|
185 |
#remove_columns=ds['train'].column_names
|
186 |
)
|
187 |
|