File size: 4,067 Bytes
bd67cfe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import os
from typing import Tuple

import numpy as np
from PIL import Image

def validate_data_directory(root: str) -> None:
    """
    Validate the data directory.
    
    Parameters:
    - root (str): Path to the dataset.
    """

    # Check 1: root exists.
    if not os.path.exists(root):
        raise FileNotFoundError(f'{root} does not exist!')
    
    # Check 2: data directory is not empty.
    subdirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
    if not subdirs:
        raise FileNotFoundError(f'{root} is empty!')

    # Check 3: each subdirectory contains at least one image.
    for subdir in subdirs:
        pgm_files = [f for f in os.listdir(os.path.join(root, subdir)) if f.endswith('.pgm')]
        if not pgm_files:
            raise FileNotFoundError(f'{os.path.join(root, subdir)} does not contain any image!')
    

def load_data(root: str='data/CroppedYaleB', reduce: int=1, global_centering: bool=True, local_centering: bool=True) -> Tuple[np.ndarray, np.ndarray]: 
    """
    Load ORL (or Extended YaleB) dataset into a numpy array.
    
    Parameters:
    - root (str): Path to the dataset.
    - reduce (int): Scale factor for downscaling images.
    - global_centering (bool): If True, apply global centering.
    - local_centering (bool): If True, apply local centering.

    Returns:
    - images (numpy.ndarray): Image data.
    - labels (numpy.ndarray): Image labels.
    """

    # Validate the data directory.
    validate_data_directory(root)

    images, labels = [], []

    for i, person in enumerate(sorted(os.listdir(root))):
        
        if not os.path.isdir(os.path.join(root, person)):
            continue
        
        for fname in os.listdir(os.path.join(root, person)):    
            
            # Remove background images in Extended YaleB dataset.
            if fname.endswith('Ambient.pgm'):
                continue
            
            if not fname.endswith('.pgm'):
                continue
                
            # Load image.
            img = Image.open(os.path.join(root, person, fname))
            img = img.convert('L') # grey image.

            # Reduce computation complexity.
            img = img.resize([s//reduce for s in img.size])

            # Convert image to numpy array.
            img = np.asarray(img).reshape((-1,1))

            # Collect data and label.
            images.append(img)
            labels.append(i)

    # Concatenate all images and labels.
    images = np.concatenate(images, axis=1)
    labels = np.array(labels)

    # Convert to float64 for numerical stability
    images = images.astype(np.float64)

    # Global centering.
    if global_centering:
        images -= images.mean(axis=0)
    
    # Local centering.
    if local_centering:
        images -= images.mean(axis=1).reshape(-1, 1)

    return images, labels


def get_image_size(root: str='code/dataCroppedYaleB') -> tuple:
    """
    Get the size of images in the dataset.
    
    Parameters:
    - root (str): Path to the dataset.

    Returns:
    - img_size (tuple): Size of each image as (width, height).
    """

    # Validate the data directory.
    validate_data_directory(root)

    img_size = None  # Initialize variable to hold image size

    for person in sorted(os.listdir(root)):
        
        if not os.path.isdir(os.path.join(root, person)):
            continue
        
        for fname in os.listdir(os.path.join(root, person)):
            
            # Remove background images in Extended YaleB dataset.
            if fname.endswith('Ambient.pgm'):
                continue
            
            if not fname.endswith('.pgm'):
                continue
                
            # Load image.
            img = Image.open(os.path.join(root, person, fname))
            img = img.convert('L') # Grey image.

            # Reduce computation complexity.
            img = img.resize([s for s in img.size])

            # Store the image size and return immediately
            return img.size  # (width, height)