XavierSpycy
First commit
bd67cfe
import os
from typing import Tuple
import numpy as np
from PIL import Image
def validate_data_directory(root: str) -> None:
"""
Validate the data directory.
Parameters:
- root (str): Path to the dataset.
"""
# Check 1: root exists.
if not os.path.exists(root):
raise FileNotFoundError(f'{root} does not exist!')
# Check 2: data directory is not empty.
subdirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
if not subdirs:
raise FileNotFoundError(f'{root} is empty!')
# Check 3: each subdirectory contains at least one image.
for subdir in subdirs:
pgm_files = [f for f in os.listdir(os.path.join(root, subdir)) if f.endswith('.pgm')]
if not pgm_files:
raise FileNotFoundError(f'{os.path.join(root, subdir)} does not contain any image!')
def load_data(root: str='data/CroppedYaleB', reduce: int=1, global_centering: bool=True, local_centering: bool=True) -> Tuple[np.ndarray, np.ndarray]:
"""
Load ORL (or Extended YaleB) dataset into a numpy array.
Parameters:
- root (str): Path to the dataset.
- reduce (int): Scale factor for downscaling images.
- global_centering (bool): If True, apply global centering.
- local_centering (bool): If True, apply local centering.
Returns:
- images (numpy.ndarray): Image data.
- labels (numpy.ndarray): Image labels.
"""
# Validate the data directory.
validate_data_directory(root)
images, labels = [], []
for i, person in enumerate(sorted(os.listdir(root))):
if not os.path.isdir(os.path.join(root, person)):
continue
for fname in os.listdir(os.path.join(root, person)):
# Remove background images in Extended YaleB dataset.
if fname.endswith('Ambient.pgm'):
continue
if not fname.endswith('.pgm'):
continue
# Load image.
img = Image.open(os.path.join(root, person, fname))
img = img.convert('L') # grey image.
# Reduce computation complexity.
img = img.resize([s//reduce for s in img.size])
# Convert image to numpy array.
img = np.asarray(img).reshape((-1,1))
# Collect data and label.
images.append(img)
labels.append(i)
# Concatenate all images and labels.
images = np.concatenate(images, axis=1)
labels = np.array(labels)
# Convert to float64 for numerical stability
images = images.astype(np.float64)
# Global centering.
if global_centering:
images -= images.mean(axis=0)
# Local centering.
if local_centering:
images -= images.mean(axis=1).reshape(-1, 1)
return images, labels
def get_image_size(root: str='code/dataCroppedYaleB') -> tuple:
"""
Get the size of images in the dataset.
Parameters:
- root (str): Path to the dataset.
Returns:
- img_size (tuple): Size of each image as (width, height).
"""
# Validate the data directory.
validate_data_directory(root)
img_size = None # Initialize variable to hold image size
for person in sorted(os.listdir(root)):
if not os.path.isdir(os.path.join(root, person)):
continue
for fname in os.listdir(os.path.join(root, person)):
# Remove background images in Extended YaleB dataset.
if fname.endswith('Ambient.pgm'):
continue
if not fname.endswith('.pgm'):
continue
# Load image.
img = Image.open(os.path.join(root, person, fname))
img = img.convert('L') # Grey image.
# Reduce computation complexity.
img = img.resize([s for s in img.size])
# Store the image size and return immediately
return img.size # (width, height)