Spaces:

XavierSpycy
/

NumPyNMF-Showcase

Sleeping

NumPyNMF-Showcase / algorithm /datasets.py

XavierSpycy

First commit

bd67cfe about 1 year ago

4.07 kB

	import os
	from typing import Tuple

	import numpy as np
	from PIL import Image

	def validate_data_directory(root: str) -> None:
	"""
	Validate the data directory.

	Parameters:
	- root (str): Path to the dataset.
	"""

	# Check 1: root exists.
	if not os.path.exists(root):
	raise FileNotFoundError(f'{root} does not exist!')

	# Check 2: data directory is not empty.
	subdirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
	if not subdirs:
	raise FileNotFoundError(f'{root} is empty!')

	# Check 3: each subdirectory contains at least one image.
	for subdir in subdirs:
	pgm_files = [f for f in os.listdir(os.path.join(root, subdir)) if f.endswith('.pgm')]
	if not pgm_files:
	raise FileNotFoundError(f'{os.path.join(root, subdir)} does not contain any image!')


	def load_data(root: str='data/CroppedYaleB', reduce: int=1, global_centering: bool=True, local_centering: bool=True) -> Tuple[np.ndarray, np.ndarray]:
	"""
	Load ORL (or Extended YaleB) dataset into a numpy array.

	Parameters:
	- root (str): Path to the dataset.
	- reduce (int): Scale factor for downscaling images.
	- global_centering (bool): If True, apply global centering.
	- local_centering (bool): If True, apply local centering.

	Returns:
	- images (numpy.ndarray): Image data.
	- labels (numpy.ndarray): Image labels.
	"""

	# Validate the data directory.
	validate_data_directory(root)

	images, labels = [], []

	for i, person in enumerate(sorted(os.listdir(root))):

	if not os.path.isdir(os.path.join(root, person)):
	continue

	for fname in os.listdir(os.path.join(root, person)):

	# Remove background images in Extended YaleB dataset.
	if fname.endswith('Ambient.pgm'):
	continue

	if not fname.endswith('.pgm'):
	continue

	# Load image.
	img = Image.open(os.path.join(root, person, fname))
	img = img.convert('L') # grey image.

	# Reduce computation complexity.
	img = img.resize([s//reduce for s in img.size])

	# Convert image to numpy array.
	img = np.asarray(img).reshape((-1,1))

	# Collect data and label.
	images.append(img)
	labels.append(i)

	# Concatenate all images and labels.
	images = np.concatenate(images, axis=1)
	labels = np.array(labels)

	# Convert to float64 for numerical stability
	images = images.astype(np.float64)

	# Global centering.
	if global_centering:
	images -= images.mean(axis=0)

	# Local centering.
	if local_centering:
	images -= images.mean(axis=1).reshape(-1, 1)

	return images, labels


	def get_image_size(root: str='code/dataCroppedYaleB') -> tuple:
	"""
	Get the size of images in the dataset.

	Parameters:
	- root (str): Path to the dataset.

	Returns:
	- img_size (tuple): Size of each image as (width, height).
	"""

	# Validate the data directory.
	validate_data_directory(root)

	img_size = None # Initialize variable to hold image size

	for person in sorted(os.listdir(root)):

	if not os.path.isdir(os.path.join(root, person)):
	continue

	for fname in os.listdir(os.path.join(root, person)):

	# Remove background images in Extended YaleB dataset.
	if fname.endswith('Ambient.pgm'):
	continue

	if not fname.endswith('.pgm'):
	continue

	# Load image.
	img = Image.open(os.path.join(root, person, fname))
	img = img.convert('L') # Grey image.

	# Reduce computation complexity.
	img = img.resize([s for s in img.size])

	# Store the image size and return immediately
	return img.size # (width, height)