Thouph commited on
Commit
9d65247
·
1 Parent(s): 9228b7a

Upload train.py

Browse files
Files changed (1) hide show
  1. train.py +4 -2
train.py CHANGED
@@ -166,11 +166,11 @@ def add_image_path(example):
166
  folder_name=example["folder_name"]
167
  #image_name = example['image_id'] + '.jpg'
168
  #image_path = os.path.join(r"D:\dump384_224x224_384\384", image_name)
169
- image_path = [os.path.join(rf"~/dump_small/{folder_name[i]}", image_name[i]) for i in range(len(image_name))]
170
  example['image_path'] = image_path
171
  return example
172
 
173
- ds = dataset.map(add_image_path, batched=True, batch_size=1024)["train"]
174
  print(ds)
175
 
176
  ds = ds.train_test_split(test_size=0.02)
@@ -180,6 +180,8 @@ processed_dataset = ds.map(
180
  function=preprocess_fn,
181
  batched=True,
182
  fn_kwargs={"max_target_length": 128},
 
 
183
  #remove_columns=ds['train'].column_names
184
  )
185
 
 
166
  folder_name=example["folder_name"]
167
  #image_name = example['image_id'] + '.jpg'
168
  #image_path = os.path.join(r"D:\dump384_224x224_384\384", image_name)
169
+ image_path = [os.path.join(rf"/home/user/dump_small/{folder_name[i]}", image_name[i]) for i in range(len(image_name))]
170
  example['image_path'] = image_path
171
  return example
172
 
173
+ ds = dataset.map(add_image_path, batched=True, batch_size=8192)["train"]
174
  print(ds)
175
 
176
  ds = ds.train_test_split(test_size=0.02)
 
180
  function=preprocess_fn,
181
  batched=True,
182
  fn_kwargs={"max_target_length": 128},
183
+ batch_size=8192,
184
+ num_proc=16,
185
  #remove_columns=ds['train'].column_names
186
  )
187