Spaces:
Sleeping
Sleeping
File size: 4,067 Bytes
bd67cfe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import os
from typing import Tuple
import numpy as np
from PIL import Image
def validate_data_directory(root: str) -> None:
"""
Validate the data directory.
Parameters:
- root (str): Path to the dataset.
"""
# Check 1: root exists.
if not os.path.exists(root):
raise FileNotFoundError(f'{root} does not exist!')
# Check 2: data directory is not empty.
subdirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
if not subdirs:
raise FileNotFoundError(f'{root} is empty!')
# Check 3: each subdirectory contains at least one image.
for subdir in subdirs:
pgm_files = [f for f in os.listdir(os.path.join(root, subdir)) if f.endswith('.pgm')]
if not pgm_files:
raise FileNotFoundError(f'{os.path.join(root, subdir)} does not contain any image!')
def load_data(root: str='data/CroppedYaleB', reduce: int=1, global_centering: bool=True, local_centering: bool=True) -> Tuple[np.ndarray, np.ndarray]:
"""
Load ORL (or Extended YaleB) dataset into a numpy array.
Parameters:
- root (str): Path to the dataset.
- reduce (int): Scale factor for downscaling images.
- global_centering (bool): If True, apply global centering.
- local_centering (bool): If True, apply local centering.
Returns:
- images (numpy.ndarray): Image data.
- labels (numpy.ndarray): Image labels.
"""
# Validate the data directory.
validate_data_directory(root)
images, labels = [], []
for i, person in enumerate(sorted(os.listdir(root))):
if not os.path.isdir(os.path.join(root, person)):
continue
for fname in os.listdir(os.path.join(root, person)):
# Remove background images in Extended YaleB dataset.
if fname.endswith('Ambient.pgm'):
continue
if not fname.endswith('.pgm'):
continue
# Load image.
img = Image.open(os.path.join(root, person, fname))
img = img.convert('L') # grey image.
# Reduce computation complexity.
img = img.resize([s//reduce for s in img.size])
# Convert image to numpy array.
img = np.asarray(img).reshape((-1,1))
# Collect data and label.
images.append(img)
labels.append(i)
# Concatenate all images and labels.
images = np.concatenate(images, axis=1)
labels = np.array(labels)
# Convert to float64 for numerical stability
images = images.astype(np.float64)
# Global centering.
if global_centering:
images -= images.mean(axis=0)
# Local centering.
if local_centering:
images -= images.mean(axis=1).reshape(-1, 1)
return images, labels
def get_image_size(root: str='code/dataCroppedYaleB') -> tuple:
"""
Get the size of images in the dataset.
Parameters:
- root (str): Path to the dataset.
Returns:
- img_size (tuple): Size of each image as (width, height).
"""
# Validate the data directory.
validate_data_directory(root)
img_size = None # Initialize variable to hold image size
for person in sorted(os.listdir(root)):
if not os.path.isdir(os.path.join(root, person)):
continue
for fname in os.listdir(os.path.join(root, person)):
# Remove background images in Extended YaleB dataset.
if fname.endswith('Ambient.pgm'):
continue
if not fname.endswith('.pgm'):
continue
# Load image.
img = Image.open(os.path.join(root, person, fname))
img = img.convert('L') # Grey image.
# Reduce computation complexity.
img = img.resize([s for s in img.size])
# Store the image size and return immediately
return img.size # (width, height) |