import os import glob import pandas as pd import numpy as np import torch from torch import nn from torch.utils.data import Dataset, DataLoader import albumentations as A from albumentations.pytorch import ToTensorV2 from PIL import Image from sklearn.model_selection import train_test_split from config.configure import mask_images_path from src import logger def get_dataframe(path: str) -> pd.DataFrame: """ Create a DataFrame containing image paths, mask paths, and labels. Args: path (str): path [mask_images] Returns: pd.DataFrame: DataFrame with image paths, mask paths, and labels. """ image_masks = glob.glob(path) image_paths = [file_path.replace("_mask", '') for file_path in image_masks] def labels(mask_path): label = [] for mask in mask_path: img = Image.open(mask) label.append(1) if np.array(img).sum() > 0 else label.append(0) return label mask_labels = labels(image_masks) df = pd.DataFrame({ 'image_path': image_paths, 'mask_path': image_masks, 'label': mask_labels }) return df class MRIDataset(Dataset): def __init__(self, paths, transform): """ Custom dataset for MRI images. Args: paths (pd.DataFrame): DataFrame containing mask paths. transform: Data augmentation and transformation pipeline. """ self.paths = paths self.transform = transform def __len__(self): return len(self.paths) def __getitem__(self, idx): image_path, mask_path = self.paths.iloc[idx] image = Image.open(image_path) mask = Image.open(mask_path) image = np.array(image).astype(np.float32) / 255. mask = np.array(mask).astype(np.float32) / 255. if self.transform: transformed = self.transform(image=image, mask=mask) return transformed['image'], transformed['mask'].unsqueeze(0) else: transformed = ToTensorV2()(image=image, mask=mask) return transformed['image'], transformed['mask'].unsqueeze(0) def data_loaders(batch_size,num_workers, train_split=False) -> DataLoader: logger.info(f"Preprocessing Data") df = get_dataframe(mask_images_path) train_transforms = A.Compose([ A.Resize(224, 224, p=1.0), A.RandomBrightnessContrast(p=0.2), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), ToTensorV2(), ]) # Only reshape val and test data val_transforms = A.Compose([ A.Resize(224, 224, p=1.0), ToTensorV2(), ]) # splitting the dataset train_x, val_x, train_y, val_y = train_test_split(df.drop('label',axis=1), df.label,test_size=0.3) val_x , test_x, val_y, test_y = train_test_split(val_x, val_y, test_size = 0.2) train_data = MRIDataset(train_x, train_transforms) val_data = MRIDataset(val_x, val_transforms) test_data = MRIDataset(test_x[test_y == 1], val_transforms) # train_loader = DataLoader(train_data, batch_size=32, shuffle=True) if train_split: train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) return train_loader, val_loader else: test_loader = DataLoader(test_data, batch_size=32, shuffle=True) return test_loader