Spaces:
Sleeping
Sleeping
File size: 3,466 Bytes
a578142 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import os
import glob
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
from sklearn.model_selection import train_test_split
from config.configure import mask_images_path
from src import logger
def get_dataframe(path: str) -> pd.DataFrame:
"""
Create a DataFrame containing image paths, mask paths, and labels.
Args:
path (str): path [mask_images]
Returns:
pd.DataFrame: DataFrame with image paths, mask paths, and labels.
"""
image_masks = glob.glob(path)
image_paths = [file_path.replace("_mask", '') for file_path in image_masks]
def labels(mask_path):
label = []
for mask in mask_path:
img = Image.open(mask)
label.append(1) if np.array(img).sum() > 0 else label.append(0)
return label
mask_labels = labels(image_masks)
df = pd.DataFrame({
'image_path': image_paths,
'mask_path': image_masks,
'label': mask_labels
})
return df
class MRIDataset(Dataset):
def __init__(self, paths, transform):
"""
Custom dataset for MRI images.
Args:
paths (pd.DataFrame): DataFrame containing mask paths.
transform: Data augmentation and transformation pipeline.
"""
self.paths = paths
self.transform = transform
def __len__(self):
return len(self.paths)
def __getitem__(self, idx):
image_path, mask_path = self.paths.iloc[idx]
image = Image.open(image_path)
mask = Image.open(mask_path)
image = np.array(image).astype(np.float32) / 255.
mask = np.array(mask).astype(np.float32) / 255.
if self.transform:
transformed = self.transform(image=image, mask=mask)
return transformed['image'], transformed['mask'].unsqueeze(0)
else:
transformed = ToTensorV2()(image=image, mask=mask)
return transformed['image'], transformed['mask'].unsqueeze(0)
def data_loaders(batch_size,num_workers, train_split=False) -> DataLoader:
logger.info(f"Preprocessing Data")
df = get_dataframe(mask_images_path)
train_transforms = A.Compose([
A.Resize(224, 224, p=1.0),
A.RandomBrightnessContrast(p=0.2),
A.HorizontalFlip(p=0.5),
A.VerticalFlip(p=0.5),
ToTensorV2(),
])
# Only reshape val and test data
val_transforms = A.Compose([
A.Resize(224, 224, p=1.0),
ToTensorV2(),
])
# splitting the dataset
train_x, val_x, train_y, val_y = train_test_split(df.drop('label',axis=1), df.label,test_size=0.3)
val_x , test_x, val_y, test_y = train_test_split(val_x, val_y, test_size = 0.2)
train_data = MRIDataset(train_x, train_transforms)
val_data = MRIDataset(val_x, val_transforms)
test_data = MRIDataset(test_x[test_y == 1], val_transforms)
# train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
if train_split:
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
return train_loader, val_loader
else:
test_loader = DataLoader(test_data, batch_size=32, shuffle=True)
return test_loader |