dsgt-kaggle-clef
/

dsgt-snakeclef

Safetensors

Model card Files Files and versions

xet

Community

Anthony Miyaguchi commited on May 24, 2024

Commit

c10f559

1 Parent(s): 43c4ba2

test for random sizes in images

Browse files

Files changed (2) hide show

generate_dummy_testset.py +4 -1
script.py +15 -21

generate_dummy_testset.py CHANGED Viewed

@@ -14,8 +14,11 @@ if __name__ == "__main__":
     with tempfile.TemporaryDirectory() as tmpdir:
         tmp_path = Path(tmpdir)
         for row in metadata.itertuples():
             img = PIL.Image.fromarray(
-                np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
             )
             img.save(tmp_path / row.filename)

     with tempfile.TemporaryDirectory() as tmpdir:
         tmp_path = Path(tmpdir)
         for row in metadata.itertuples():
+            # random dimensions
+            x = np.random.randint(100, 300)
+            y = np.random.randint(100, 300)
             img = PIL.Image.fromarray(
+                np.random.randint(0, 255, (x, y, 3), dtype=np.uint8)
             )
             img.save(tmp_path / row.filename)

script.py CHANGED Viewed

@@ -13,10 +13,12 @@ from transformers import AutoImageProcessor, AutoModel
 class ImageDataset(Dataset):
-    def __init__(self, metadata_path, images_root_path):
         self.metadata_path = metadata_path
         self.metadata = pd.read_csv(metadata_path)
         self.images_root_path = images_root_path
     def __len__(self):
         return len(self.metadata)
@@ -24,9 +26,18 @@ class ImageDataset(Dataset):
     def __getitem__(self, idx):
         row = self.metadata.iloc[idx]
         image_path = Path(self.images_root_path) / row.filename
-        img = Image.open(image_path).convert("RGB")
-        img = torch.from_numpy(np.array(img))
-        return {"features": img, "observation_id": row.observation_id}
 class LinearClassifier(nn.Module):
@@ -40,21 +51,6 @@ class LinearClassifier(nn.Module):
         return torch.log_softmax(self.model(x), dim=1)
-class TransformDino:
-    def __init__(self, model_name="./dinov2"):
-        self.processor = AutoImageProcessor.from_pretrained(model_name)
-        self.model = AutoModel.from_pretrained(model_name)
-    def forward(self, batch):
-        model_inputs = self.processor(images=batch["features"], return_tensors="pt")
-        with torch.no_grad():
-            outputs = self.model(**model_inputs)
-            last_hidden_states = outputs.last_hidden_state
-        # extract the cls token
-        batch["features"] = last_hidden_states[:, 0]
-        return batch
 def make_submission(
     test_metadata,
     model_path,
@@ -66,13 +62,11 @@ def make_submission(
     model = LinearClassifier(hparams["num_features"], hparams["num_classes"])
     model.load_state_dict(checkpoint["state_dict"])
-    transform = TransformDino()
     dataloader = DataLoader(
         ImageDataset(test_metadata, images_root_path), batch_size=32, num_workers=4
     )
     rows = []
     for batch in dataloader:
-        batch = transform.forward(batch)
         observation_ids = batch["observation_id"]
         logits = model(batch["features"])
         class_ids = torch.argmax(logits, dim=1)

 class ImageDataset(Dataset):
+    def __init__(self, metadata_path, images_root_path, model_name="./dinov2"):
         self.metadata_path = metadata_path
         self.metadata = pd.read_csv(metadata_path)
         self.images_root_path = images_root_path
+        self.processor = AutoImageProcessor.from_pretrained(model_name)
+        self.model = AutoModel.from_pretrained(model_name)
     def __len__(self):
         return len(self.metadata)
     def __getitem__(self, idx):
         row = self.metadata.iloc[idx]
         image_path = Path(self.images_root_path) / row.filename
+        model_inputs = self.processor(
+            images=Image.open(image_path), return_tensors="pt"
+        )
+        with torch.no_grad():
+            outputs = self.model(**model_inputs)
+            last_hidden_states = outputs.last_hidden_state
+        # extract the cls token
+        return {
+            "features": last_hidden_states[0, 0],
+            "observation_id": row.observation_id,
+        }
 class LinearClassifier(nn.Module):
         return torch.log_softmax(self.model(x), dim=1)
 def make_submission(
     test_metadata,
     model_path,
     model = LinearClassifier(hparams["num_features"], hparams["num_classes"])
     model.load_state_dict(checkpoint["state_dict"])
     dataloader = DataLoader(
         ImageDataset(test_metadata, images_root_path), batch_size=32, num_workers=4
     )
     rows = []
     for batch in dataloader:
         observation_ids = batch["observation_id"]
         logits = model(batch["features"])
         class_ids = torch.argmax(logits, dim=1)