File size: 3,466 Bytes
a578142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import glob
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
from sklearn.model_selection import train_test_split
from config.configure import mask_images_path
from src import logger
def get_dataframe(path: str) -> pd.DataFrame:
    """
    Create a DataFrame containing image paths, mask paths, and labels.

    Args:
        path (str): path [mask_images]

    Returns:
        pd.DataFrame: DataFrame with image paths, mask paths, and labels.
    """

    image_masks = glob.glob(path)
    image_paths = [file_path.replace("_mask", '') for file_path in image_masks]

    def labels(mask_path):
        label = []
        for mask in mask_path:
            img = Image.open(mask)
            label.append(1) if np.array(img).sum() > 0 else label.append(0)
        return label

    mask_labels = labels(image_masks)

    df = pd.DataFrame({
        'image_path': image_paths,
        'mask_path': image_masks,
        'label': mask_labels
    })

    return df

class MRIDataset(Dataset):
    def __init__(self, paths, transform):
        """
        Custom dataset for MRI images.

        Args:
            paths (pd.DataFrame): DataFrame containing mask paths.
            transform: Data augmentation and transformation pipeline.
        """
        self.paths = paths
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        image_path, mask_path = self.paths.iloc[idx]
        image = Image.open(image_path)
        mask = Image.open(mask_path)

        image = np.array(image).astype(np.float32) / 255.
        mask = np.array(mask).astype(np.float32) / 255.

        if self.transform:
            transformed = self.transform(image=image, mask=mask)
            return transformed['image'], transformed['mask'].unsqueeze(0)
        else:
            transformed = ToTensorV2()(image=image, mask=mask)
            return transformed['image'], transformed['mask'].unsqueeze(0)


def data_loaders(batch_size,num_workers, train_split=False) -> DataLoader:
    
    logger.info(f"Preprocessing Data")
    df = get_dataframe(mask_images_path)

    train_transforms = A.Compose([
    A.Resize(224, 224, p=1.0),
    A.RandomBrightnessContrast(p=0.2),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    ToTensorV2(),
    ])

    # Only reshape val and test data
    val_transforms = A.Compose([
        A.Resize(224, 224, p=1.0),
        ToTensorV2(),
    ])

    # splitting the dataset
    train_x, val_x, train_y, val_y = train_test_split(df.drop('label',axis=1), df.label,test_size=0.3)
    val_x , test_x, val_y, test_y = train_test_split(val_x, val_y, test_size = 0.2)

    train_data = MRIDataset(train_x, train_transforms)
    val_data = MRIDataset(val_x, val_transforms)
    test_data = MRIDataset(test_x[test_y == 1], val_transforms)


    # train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

    if train_split:
        train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=num_workers)

        return train_loader, val_loader
    else:
        test_loader = DataLoader(test_data, batch_size=32, shuffle=True)
        return test_loader