Spaces:
Sleeping
Sleeping
import os | |
import glob | |
import pandas as pd | |
import numpy as np | |
import torch | |
from torch import nn | |
from torch.utils.data import Dataset, DataLoader | |
import albumentations as A | |
from albumentations.pytorch import ToTensorV2 | |
from PIL import Image | |
from sklearn.model_selection import train_test_split | |
from config.configure import mask_images_path | |
from src import logger | |
def get_dataframe(path: str) -> pd.DataFrame: | |
""" | |
Create a DataFrame containing image paths, mask paths, and labels. | |
Args: | |
path (str): path [mask_images] | |
Returns: | |
pd.DataFrame: DataFrame with image paths, mask paths, and labels. | |
""" | |
image_masks = glob.glob(path) | |
image_paths = [file_path.replace("_mask", '') for file_path in image_masks] | |
def labels(mask_path): | |
label = [] | |
for mask in mask_path: | |
img = Image.open(mask) | |
label.append(1) if np.array(img).sum() > 0 else label.append(0) | |
return label | |
mask_labels = labels(image_masks) | |
df = pd.DataFrame({ | |
'image_path': image_paths, | |
'mask_path': image_masks, | |
'label': mask_labels | |
}) | |
return df | |
class MRIDataset(Dataset): | |
def __init__(self, paths, transform): | |
""" | |
Custom dataset for MRI images. | |
Args: | |
paths (pd.DataFrame): DataFrame containing mask paths. | |
transform: Data augmentation and transformation pipeline. | |
""" | |
self.paths = paths | |
self.transform = transform | |
def __len__(self): | |
return len(self.paths) | |
def __getitem__(self, idx): | |
image_path, mask_path = self.paths.iloc[idx] | |
image = Image.open(image_path) | |
mask = Image.open(mask_path) | |
image = np.array(image).astype(np.float32) / 255. | |
mask = np.array(mask).astype(np.float32) / 255. | |
if self.transform: | |
transformed = self.transform(image=image, mask=mask) | |
return transformed['image'], transformed['mask'].unsqueeze(0) | |
else: | |
transformed = ToTensorV2()(image=image, mask=mask) | |
return transformed['image'], transformed['mask'].unsqueeze(0) | |
def data_loaders(batch_size,num_workers, train_split=False) -> DataLoader: | |
logger.info(f"Preprocessing Data") | |
df = get_dataframe(mask_images_path) | |
train_transforms = A.Compose([ | |
A.Resize(224, 224, p=1.0), | |
A.RandomBrightnessContrast(p=0.2), | |
A.HorizontalFlip(p=0.5), | |
A.VerticalFlip(p=0.5), | |
ToTensorV2(), | |
]) | |
# Only reshape val and test data | |
val_transforms = A.Compose([ | |
A.Resize(224, 224, p=1.0), | |
ToTensorV2(), | |
]) | |
# splitting the dataset | |
train_x, val_x, train_y, val_y = train_test_split(df.drop('label',axis=1), df.label,test_size=0.3) | |
val_x , test_x, val_y, test_y = train_test_split(val_x, val_y, test_size = 0.2) | |
train_data = MRIDataset(train_x, train_transforms) | |
val_data = MRIDataset(val_x, val_transforms) | |
test_data = MRIDataset(test_x[test_y == 1], val_transforms) | |
# train_loader = DataLoader(train_data, batch_size=32, shuffle=True) | |
if train_split: | |
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) | |
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=num_workers) | |
return train_loader, val_loader | |
else: | |
test_loader = DataLoader(test_data, batch_size=32, shuffle=True) | |
return test_loader |