Thouph
/

test_temp

Model card Files Files and versions Community

Thouph commited on May 14, 2023

Commit

997f017

1 Parent(s): 9d65247

Upload train.py

Browse files

Files changed (1) hide show

train.py +25 -26

train.py CHANGED Viewed

@@ -64,17 +64,19 @@ def feature_extraction_fn(image_paths, check_image=True):
     return encoder_inputs.pixel_values
-def preprocess_fn(examples, max_target_length, check_image=True):
-    """Run tokenization + image feature extraction"""
-    image_paths = examples["image_path"]
-    captions = examples['tags']
     model_inputs = {}
-    # This contains image path column
-    model_inputs['labels'] = tokenization_fn(captions, max_target_length)
-    model_inputs['pixel_values'] = feature_extraction_fn(image_paths, check_image=check_image)
     return model_inputs
 def postprocess_text(preds, labels):
@@ -164,8 +166,6 @@ print(dataset)
 def add_image_path(example):
     image_name = [i + '.jpg' for i in example["image_id"]]
     folder_name=example["folder_name"]
-    #image_name = example['image_id'] + '.jpg'
-    #image_path = os.path.join(r"D:\dump384_224x224_384\384", image_name)
     image_path = [os.path.join(rf"/home/user/dump_small/{folder_name[i]}", image_name[i]) for i in range(len(image_name))]
     example['image_path'] = image_path
     return example
@@ -174,36 +174,35 @@ ds = dataset.map(add_image_path, batched=True, batch_size=8192)["train"]
 print(ds)
 ds = ds.train_test_split(test_size=0.02)
-print(ds['train'][0])
-processed_dataset = ds.map(
-    function=preprocess_fn,
-    batched=True,
-    fn_kwargs={"max_target_length": 128},
-    batch_size=8192,
-    num_proc=16,
-    #remove_columns=ds['train'].column_names
-)
 training_args = Seq2SeqTrainingArguments(
     predict_with_generate=True,
     evaluation_strategy="steps",
     eval_steps=100,
     gradient_accumulation_steps=4,
-    per_device_train_batch_size=1,
     weight_decay=0.1,
-    max_steps=1000,
     warmup_steps=1000,
     logging_strategy="steps",
-    save_steps=200,
     fp16=True,
     tpu_num_cores=8,
-    per_device_eval_batch_size=1,
     output_dir="image-captioning-output",
     learning_rate=5e-4,
     lr_scheduler_type="cosine",
 )
 metric = evaluate.load("rouge")
 ignore_pad_token_for_loss = True
@@ -214,9 +213,9 @@ trainer = Seq2SeqTrainer(
     tokenizer=feature_extractor,
     args=training_args,
     compute_metrics=compute_metrics,
-    train_dataset=processed_dataset['train'],
-    eval_dataset=processed_dataset['test'],
-    data_collator=default_data_collator,
 )

     return encoder_inputs.pixel_values
+def transform(example_batch):
+    # Take a list of PIL images and turn them to pixel values
+    inputs = feature_extractor([x for x in example_batch['image']], return_tensors='pt')
+    # Don't forget to include the labels!
+    inputs['labels'] = example_batch['labels']
+    return inputs
+def preprocess_fn(example_batch):
+    """Run tokenization + image feature extraction"""
     model_inputs = {}
+    model_inputs['pixel_values'] = feature_extraction_fn([x for x in example_batch['image_path']])
+    model_inputs['labels'] = tokenization_fn([x for x in example_batch['tags']], 128)
     return model_inputs
 def postprocess_text(preds, labels):
 def add_image_path(example):
     image_name = [i + '.jpg' for i in example["image_id"]]
     folder_name=example["folder_name"]
     image_path = [os.path.join(rf"/home/user/dump_small/{folder_name[i]}", image_name[i]) for i in range(len(image_name))]
     example['image_path'] = image_path
     return example
 print(ds)
 ds = ds.train_test_split(test_size=0.02)
+print(ds['train'][0:2])
+ds.set_transform(preprocess_fn)
+print(ds['train'][0:2])
 training_args = Seq2SeqTrainingArguments(
     predict_with_generate=True,
     evaluation_strategy="steps",
     eval_steps=100,
     gradient_accumulation_steps=4,
+    per_device_train_batch_size=128,
     weight_decay=0.1,
+    max_steps=10000,
     warmup_steps=1000,
     logging_strategy="steps",
+    save_steps=5000,
     fp16=True,
     tpu_num_cores=8,
+    per_device_eval_batch_size=128,
     output_dir="image-captioning-output",
     learning_rate=5e-4,
     lr_scheduler_type="cosine",
 )
+def collate_fn(batch):
+    return {
+        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
+        'labels': torch.tensor([x['labels'] for x in batch])
+    }
 metric = evaluate.load("rouge")
 ignore_pad_token_for_loss = True
     tokenizer=feature_extractor,
     args=training_args,
     compute_metrics=compute_metrics,
+    train_dataset=ds['train'],
+    eval_dataset=ds['test'],
+    data_collator=collate_fn,
 )