from typing import Union, Tuple import numpy as np class MinMaxScaler: """ This class scales and transforms features to [0, 1]. """ def fit(self, X: np.ndarray) -> None: """ Compute the minimum and the range of the data for later scaling. Parameters: - X: numpy array-like, shape (n_samples, n_features) The data used to compute the minimum and range used for later scaling. """ self.min_ = np.min(X, axis=0) self.range_ = np.max(X, axis=0) - self.min_ def transform(self, X: np.ndarray) -> np.ndarray: """ Scale the data using the values computed during the fit method. Parameters: - X: numpy array-like, shape (n_samples, n_features) Input data that needs to be scaled. Returns: - numpy array, shape (n_samples, n_features) Transformed data. """ return (X - self.min_) / self.range_ def fit_transform(self, X: np.ndarray) -> np.ndarray: """ Fit to the data and then transform it. Parameters: - X: numpy array-like, shape (n_samples, n_features) Input data that needs to be scaled and transformed. Returns: - numpy array, shape (n_samples, n_features) Transformed data. """ self.fit(X) return self.transform(X) class StandardScaler: """ This class standardizes features by removing the mean and scaling to unit variance. """ def fit(self, X: np.ndarray) -> None: """ Compute the mean and standard deviation of the data for later standardization. Parameters: - X: numpy array-like, shape (n_samples, n_features) The data used to compute the mean and standard deviation used for later standardization. """ self.mean_ = np.mean(X, axis=0) self.std_ = np.std(X, axis=0) def transform(self, X: np.ndarray) -> np.ndarray: """ Standardize the data using the values computed during the fit method. Parameters: - X: numpy array-like, shape (n_samples, n_features) Input data that needs to be standardized. Returns: - numpy array, shape (n_samples, n_features) Transformed data. """ return (X - self.mean_) / self.std_ def fit_transform(self, X: np.ndarray) -> np.ndarray: """ Fit to the data and then transform it. Parameters: - X: numpy array-like, shape (n_samples, n_features) Input data that needs to be standardized and transformed. Returns: - numpy array, shape (n_samples, n_features) Transformed data. """ self.fit(X) return self.transform(X) class NoiseAdder: """ This class adds noise to data. """ def __init__(self, random_state: Union[int, np.random.RandomState, None]=None) -> None: """ Initializes the NoiseAdder with a random state and noise parameters. Parameters: - random_state (int or RandomState instance or None): Controls the randomness. If int, is used as seed for RandomState. - noise_params (dict): Additional noise parameters. """ self.rng = np.random.RandomState(random_state) def add_uniform_noise(self, X_hat: np.ndarray, noise_level: int=0.1) -> Tuple[np.ndarray, np.ndarray]: """ Add uniform random noise to data. Parameters: - X_hat (numpy array): Original data. Returns: - Numpy array of uniform noise. - Numpy array with added uniform noise. """ a, b = 0, 1 # Generate noise X_noise = self.rng.uniform(a, b, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat)) return X_noise, X_hat + X_noise def add_gaussian_noise(self, X_hat, noise_level=0.1): """ Add Gaussian noise to data. Parameters: - X_hat (numpy array): Original data. - mean (float): Mean of the Gaussian distribution. - std (float): Standard deviation of the Gaussian distribution. Returns: - Numpy array of Gaussian noise. - Numpy array with added Gaussian noise. """ mean, std = 0, 1 # Generate noise X_noise = self.rng.normal(mean, std, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat)) return X_noise, X_hat + X_noise def add_laplacian_noise(self, X_hat, noise_level=0.1): """ Add Laplacian noise to data. Parameters: - X_hat (numpy array): Original data. - mu (float): Location parameter for the Laplacian distribution. - lambd (float): Scale (diversity) parameter for the Laplacian distribution. Returns: - Numpy array of Laplacian noise. - Numpy array with added Laplacian noise. """ # Initialize parameters mu, lambd = 0, 1 # Generate noise X_noise = self.rng.laplace(mu, lambd, size=X_hat.shape) * noise_level * np.max(X_hat) return X_noise, X_hat + X_noise def add_block_noise(self, X_hat: np.ndarray, img_width: int, block_size: int=10) -> Tuple[np.ndarray, np.ndarray]: """ Add block noise to multiple flattened image samples. Parameters: - X (numpy array): Array of shape (m, n) where m is flattened image length and n is number of samples - img_width (int): width of the original image - block_size (int): size of the block to occlude Returns: - Numpy array of noise added to each sample - Numpy array with added block noise for all samples """ # Initalize parameters X = X_hat.copy() m, n_samples = X.shape X_noise = np.zeros((m, n_samples), dtype=np.uint8) # For each sample in X for i in range(n_samples): sample = X[:, i] # Reshape the flattened array to 2D img_2d = sample.reshape(-1, img_width) height, width = img_2d.shape # Ensure the block size isn't larger than the image dimensions block_size = min(block_size, width, height) # Generate a random starting point for the block x_start = self.rng.randint(0, width - block_size) y_start = self.rng.randint(0, height - block_size) # Add block noise img_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255 # Store the noise block to noise array noise_2d = np.zeros((height, width), dtype=np.uint8) noise_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255 X_noise[:, i] = noise_2d.ravel() # Flatten the array back to 1D and store back in X X[:, i] = img_2d.ravel() return X_noise, X def add_salt_and_pepper_noise(self, X_hat, noise_level=0.02, salt_ratio=0.5) -> Tuple[np.ndarray, np.ndarray]: """ Add "salt and pepper" noise to data. Parameters: - X_hat (numpy array): Original data. - amount (float): Proportion of image pixels to be replaced. - salt_ratio (float): Proportion of replaced pixels that are "salt". Returns: - Numpy array of salt and pepper noise. - Numpy array with added salt and pepper noise. """ # Initialize parameters X = X_hat.copy() X_noise = np.zeros_like(X) # Get the total number of pixels that should be replaced by noise total_pixels = X.size num_noise_pixels = int(total_pixels * noise_level) # Separate the number of salt and pepper pixels based on the salt_ratio num_salt = int(num_noise_pixels * salt_ratio) num_pepper = num_noise_pixels - num_salt # Directly generate the noise coordinates without overlap noise_coords = self.rng.choice(total_pixels, num_noise_pixels, replace=False) salt_coords = noise_coords[:num_salt] pepper_coords = noise_coords[num_salt:] # Convert the 1D noise coordinates back to tuple of N-dim coordinates salt_coords = np.unravel_index(salt_coords, X.shape) pepper_coords = np.unravel_index(pepper_coords, X.shape) # Set salt and pepper pixels in the image max_pixel_val = np.max(X) X_noise[salt_coords] = max_pixel_val X_noise[pepper_coords] = 0 X[salt_coords] = max_pixel_val X[pepper_coords] = 0 return X_noise, X