Spaces:

XavierSpycy
/

NumPyNMF-Showcase

Sleeping

App Files Files Community

XavierSpycy commited on Jun 26, 2024

Commit

bd67cfe

1 Parent(s): 7991934

First commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
algorithm/__init__.py +0 -0
algorithm/__pycache__/__init__.cpython-312.pyc +0 -0
algorithm/__pycache__/datasets.cpython-312.pyc +0 -0
algorithm/__pycache__/nmf.cpython-312.pyc +0 -0
algorithm/__pycache__/preprocess.cpython-312.pyc +0 -0
algorithm/__pycache__/sample.cpython-312.pyc +0 -0
algorithm/datasets.py +135 -0
algorithm/nmf.py +752 -0
algorithm/pipeline.py +371 -0
algorithm/preprocess.py +234 -0
algorithm/sample.py +37 -0
algorithm/user_evaluate.py +32 -0
algorithm/visualize.py +161 -0
app.py +196 -0
data/.DS_Store +0 -0
data/CroppedYaleB/.DS_Store +0 -0
data/CroppedYaleB/yaleB01/DEADJOE +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00.info +23 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+00.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+45.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+90.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-35.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+005E+10.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+005E-10.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+010E+00.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+010E-20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+015E+20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E+10.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-10.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-40.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+025E+00.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+15.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+40.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+65.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E-20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+050E+00.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+050E-40.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+060E+20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+060E-20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+00.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+45.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E-35.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+085E+20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+085E-20.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+095E+00.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+15.pgm +3 -0
data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+40.pgm +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pgm filter=lfs diff=lfs merge=lfs -text

algorithm/__init__.py ADDED Viewed

File without changes

algorithm/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (148 Bytes). View file

algorithm/__pycache__/datasets.cpython-312.pyc ADDED Viewed

Binary file (5.53 kB). View file

algorithm/__pycache__/nmf.cpython-312.pyc ADDED Viewed

Binary file (53.1 kB). View file

algorithm/__pycache__/preprocess.cpython-312.pyc ADDED Viewed

Binary file (11.5 kB). View file

algorithm/__pycache__/sample.cpython-312.pyc ADDED Viewed

Binary file (1.77 kB). View file

algorithm/datasets.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import os
+from typing import Tuple
+import numpy as np
+from PIL import Image
+def validate_data_directory(root: str) -> None:
+    """
+    Validate the data directory.
+    Parameters:
+    - root (str): Path to the dataset.
+    """
+    # Check 1: root exists.
+    if not os.path.exists(root):
+        raise FileNotFoundError(f'{root} does not exist!')
+    # Check 2: data directory is not empty.
+    subdirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
+    if not subdirs:
+        raise FileNotFoundError(f'{root} is empty!')
+    # Check 3: each subdirectory contains at least one image.
+    for subdir in subdirs:
+        pgm_files = [f for f in os.listdir(os.path.join(root, subdir)) if f.endswith('.pgm')]
+        if not pgm_files:
+            raise FileNotFoundError(f'{os.path.join(root, subdir)} does not contain any image!')
+def load_data(root: str='data/CroppedYaleB', reduce: int=1, global_centering: bool=True, local_centering: bool=True) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Load ORL (or Extended YaleB) dataset into a numpy array.
+    Parameters:
+    - root (str): Path to the dataset.
+    - reduce (int): Scale factor for downscaling images.
+    - global_centering (bool): If True, apply global centering.
+    - local_centering (bool): If True, apply local centering.
+    Returns:
+    - images (numpy.ndarray): Image data.
+    - labels (numpy.ndarray): Image labels.
+    """
+    # Validate the data directory.
+    validate_data_directory(root)
+    images, labels = [], []
+    for i, person in enumerate(sorted(os.listdir(root))):
+        if not os.path.isdir(os.path.join(root, person)):
+            continue
+        for fname in os.listdir(os.path.join(root, person)):
+            # Remove background images in Extended YaleB dataset.
+            if fname.endswith('Ambient.pgm'):
+                continue
+            if not fname.endswith('.pgm'):
+                continue
+            # Load image.
+            img = Image.open(os.path.join(root, person, fname))
+            img = img.convert('L') # grey image.
+            # Reduce computation complexity.
+            img = img.resize([s//reduce for s in img.size])
+            # Convert image to numpy array.
+            img = np.asarray(img).reshape((-1,1))
+            # Collect data and label.
+            images.append(img)
+            labels.append(i)
+    # Concatenate all images and labels.
+    images = np.concatenate(images, axis=1)
+    labels = np.array(labels)
+    # Convert to float64 for numerical stability
+    images = images.astype(np.float64)
+    # Global centering.
+    if global_centering:
+        images -= images.mean(axis=0)
+    # Local centering.
+    if local_centering:
+        images -= images.mean(axis=1).reshape(-1, 1)
+    return images, labels
+def get_image_size(root: str='code/dataCroppedYaleB') -> tuple:
+    """
+    Get the size of images in the dataset.
+    Parameters:
+    - root (str): Path to the dataset.
+    Returns:
+    - img_size (tuple): Size of each image as (width, height).
+    """
+    # Validate the data directory.
+    validate_data_directory(root)
+    img_size = None  # Initialize variable to hold image size
+    for person in sorted(os.listdir(root)):
+        if not os.path.isdir(os.path.join(root, person)):
+            continue
+        for fname in os.listdir(os.path.join(root, person)):
+            # Remove background images in Extended YaleB dataset.
+            if fname.endswith('Ambient.pgm'):
+                continue
+            if not fname.endswith('.pgm'):
+                continue
+            # Load image.
+            img = Image.open(os.path.join(root, person, fname))
+            img = img.convert('L') # Grey image.
+            # Reduce computation complexity.
+            img = img.resize([s for s in img.size])
+            # Store the image size and return immediately
+            return img.size  # (width, height)

algorithm/nmf.py ADDED Viewed

	@@ -0,0 +1,752 @@

+import os
+import time
+from abc import ABC, abstractmethod
+from collections import Counter
+from typing import Union, Dict, Tuple, Generator
+import numpy as np
+from tqdm import tqdm
+from scipy.linalg import pinv
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans, BisectingKMeans
+from sklearn.metrics import mean_squared_error,  accuracy_score, normalized_mutual_info_score
+class BasicNMF(ABC):
+    name = 'Basic'
+    """
+    A basic framework for Non-negative Matrix Factorization (NMF) algorithms.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize the basic NMF algorithm.
+        """
+        self.loss_list = []
+    def __PCA(self, X: np.ndarray, n_components: int) -> np.ndarray:
+        """
+        Principal Component Analysis (PCA) for dimensionality reduction.
+        Parameters:
+            X (numpy.ndarray): Input dataset of shape (n_samples, n_features).
+            n_components (int): Number of principal components to retain.
+        Returns:
+            transformed_data (numpy.ndarray): Dataset transformed into principal component space.
+        """
+        if n_components > X.shape[1]:
+            raise ValueError("n_components must be less than or equal to the number of features")
+        # Center the data
+        X_centered = X - np.mean(X, axis=0)
+        # Calculate the covariance matrix and its eigenvalues and eigenvectors
+        cov_mat = np.cov(X_centered, rowvar=False)
+        eigenvalues, eigenvectors = np.linalg.eigh(cov_mat)
+        # Sort the eigenvalues and eigenvectors in descending order
+        sorted_indices = eigenvalues.argsort()[::-1]
+        eigenvectors = eigenvectors[:, sorted_indices]
+        # Projection matrix using the first n_components eigenvectors
+        projection_matrix = eigenvectors[:, :n_components]
+        # Project the data onto the new feature space
+        transformed_data = np.dot(X_centered, projection_matrix)
+        return transformed_data
+    def __FastICA(self, X: np.ndarray, max_iter: int=200, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
+        """
+        Implementation of FastICA algorithm to separate the independent sources
+        from mixed signals in the input data.
+        Parameters:
+        X (numpy.ndarray): Input dataset of shape (n_samples, n_features).
+        max_iter (int, optional): The maximum number of iterations for the convergence of the estimation. Default is 200.
+        Return:
+        S (numpy.ndarray): Matrix of shape (n_samples, n_features) representing the estimated independent sources.
+        """
+        # Set the random state
+        rng = np.random.RandomState(random_state)
+        # Center the data by removing the mean
+        X = X - np.mean(X, axis=1, keepdims=True)
+        n = X.shape[0]
+        # Compute the independent components iteratively
+        W = np.zeros((n, n))
+        for i in range(n):
+            w = rng.rand(n)
+            for j in range(max_iter):  # max iterations for convergence
+                w_new = (X * np.dot(w, X)).mean(axis=1) - 2 * w
+                w_new /= np.sqrt((w_new ** 2).sum())
+                # Convergence check based on the weight vector's direction
+                if np.abs(np.abs((w_new * w).sum()) - 1) < 1e-04:
+                    break
+                w = w_new
+            W[i, :] = w
+            X -= np.outer(w, np.dot(w, X))
+        # Compute the estimated independent sources
+        S = np.dot(W, X)
+        return S
+    def __NICA(self, X: np.ndarray, r: int, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Implementation of a non-negative Independent Component Analysis (NICA).
+        The process involves obtaining a non-negative basic matrix and a
+        non-negative coefficient matrix from the input data.
+        Parameters:
+        - X (numpy.ndarray): The input data matrix of shape (n_features, n_samples)
+                            where n_samples is the number of samples, and n_features
+                            is the number of features.
+        - r (int): The number of components to be retained after applying PCA.
+        Returns:
+        - W_0 (numpy.ndarray): The non-negative dictionary matrix.
+        - H_0 (numpy.ndarray): The non-negative representation matrix.
+        """
+        # Set A as a pseudoinverse of X
+        A = pinv(X.T)
+        # Apply PCA on the matrix A to generate the basic matrix W
+        W = self.__PCA(A, n_components=r)
+        # Whiten the basic matrix W obtained above by using the eigenvalue decomposition of the covariance matrix of W.
+        eigenvalues, eigenvectors = np.linalg.eigh(np.cov(W, rowvar=False))
+        # Preallocate memory for whitened matrix
+        W_whitened = np.empty_like(W)
+        np.dot(W, eigenvectors, out=W_whitened)
+        W_whitened /= np.sqrt(eigenvalues + 1e-5)
+        # Implement ICA algorithm on the whitened matrix W and obtain the independent basic matrix W_0
+        # Assuming FastICA() returns the transformed matrix
+        W_0 = self.__FastICA(W_whitened, random_state=random_state)
+        # Preallocate memory for H_0 and calculate it
+        H_0 = np.empty((W_0.shape[1], X.shape[1]))
+        np.dot(W_0.T, X, out=H_0)
+        # Take the absolute value in-place
+        np.abs(W_0, out=W_0)
+        np.abs(H_0, out=H_0)
+        return W_0, H_0
+    def Kmeans(self, X: np.ndarray, n_components: int, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Initialize D and R matrices using K-means algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - n_components (int): The number of components for matrix factorization.
+        - random_state (int, np.random.RandomState, None): Random state for reproducibility.
+        """
+        # Intialize
+        kmeans = KMeans(n_clusters=n_components, n_init='auto', random_state=random_state)
+        kmeans.fit(X.T)
+        D = kmeans.cluster_centers_.T
+        labels = kmeans.labels_
+        G = np.zeros(((len(labels)), n_components))
+        for i, label in enumerate(labels):
+            G[i, label] = 1
+        G = G / np.sqrt(np.sum(G, axis=0, keepdims=True))
+        G += 0.2
+        R = G.T
+        return D, R
+    def matrix_init(self, X: np.ndarray, n_components: int,
+                     random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Initialize D and R matrices using NICA algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - n_components (int): The number of components for matrix factorization.
+        - random_state (int, np.random.RandomState, None): Random state for reproducibility.
+        Returns:
+        - D (numpy.ndarray): The non-negative dictionary matrix.
+        - R (numpy.ndarray): The non-negative representation matrix.
+        """
+        # Intialize
+        D, R = self.__NICA(X, n_components, random_state=random_state)
+        return D, R
+    def fit(self, X: np.ndarray, n_components: int, max_iter: int=500,
+            random_state: Union[int, np.random.RandomState, None]=None,
+            verbose: bool=True, imshow: bool=False, warm_start: bool=False, **kwargs) -> None:
+        """
+        Non-negative Matrix Factorization (NMF) algorithm using L2-norm for convergence criterion.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - n_components (int): The number of components for matrix factorization.
+        - max_iter (int, optional): Maximum number of iterations. Default is 5000.
+        - verbose (bool, optional): Whether to show the progress bar.
+        - random_state (int, np.random.RandomState, None, optional): Random state for reproducibility. Default is None.
+        - imshow (bool, optional): Whether to plot convergence trend. Default is False.
+        - warm_start (bool, optional): Whether to continue from the previous state. Default is False.
+        - kwargs: Additional keyword arguments for the update rule.
+        """
+        # Record start time
+        start_time = time.time()
+        # Initialize D and R matrices using NICA algorithm by default
+        if not warm_start or (warm_start and not hasattr(self, 'D') and not hasattr(self, 'R')):
+            self.D, self.R = self.matrix_init(X, n_components, random_state)
+        else:
+            if verbose:
+                print('Warm start enabled. Continuing from previous state.')
+        # Compute initialization time
+        init_time = time.time() - start_time
+        # Copy D and R matrices for convergence check
+        self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
+        if verbose:
+            print(f'Initialization done. Time elapsed: {init_time:.2f} seconds.')
+        # Iteratively update D and R matrices until convergence
+        for _ in self.conditional_tqdm(range(max_iter), verbose=verbose):
+            # Update D and R matrices
+            flag = self.update(X, **kwargs)
+            # Check convergence
+            if flag:
+                if verbose:
+                    print('Converged at iteration', _)
+                break
+        if imshow:
+            self.plot()
+    @abstractmethod
+    def update(self, X: np.ndarray, **kwargs: Dict[str, float]) -> bool:
+        """
+        Update rule for D and R matrices using a specific NMF algorithm, which must be implemented in the derived class.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - kwargs: Additional keyword arguments for the update rule.
+        Returns:
+        - flag (bool): Whether the algorithm has converged.
+        """
+        # Calculate L2-norm based errors for convergence
+        e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
+        e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
+        return (e_D < 1e-6 and e_R < 1e-6)
+    def plot(self) -> None:
+        """
+        Plot the convergence trend of the cost function.
+        """
+        plt.plot(self.loss_list)
+        plt.xlabel('Iteration')
+        plt.ylabel('Cost function')
+        plt.grid()
+        plt.show()
+    def conditional_tqdm(self, iterable, verbose: bool=True) -> Generator[int, None, None]:
+        """
+        Determine whether to use tqdm or not based on the verbose flag.
+        Parameters:
+        - iterable (range): Range of values to iterate over.
+        - verbose (bool, optional): Whether to print progress bar. Default is True.
+        Returns:
+        - item (int): Current iteration.
+        """
+        if verbose:
+            for item in tqdm(iterable):
+                yield item
+        else:
+            for item in iterable:
+                yield item
+    def normalize(self, epsilon: float=1e-7) -> None:
+        """
+        Normalize columns of D and rows of R.
+        Parameter:
+        - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
+        """
+        # Normalize columns of D and rows of R
+        norms = np.sqrt(np.sum(self.D**2, axis=0))
+        self.D /= norms[np.newaxis, :] + epsilon
+        self.R *= norms[:, np.newaxis]
+    def evaluate(self, X_clean: np.ndarray, Y_true: np.ndarray, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[float, float, float]:
+        """
+        Evaluate the specific NMF algorithm on the specific dataset.
+        Parameters:
+        - X_clean (numpy.ndarray): The original clean data matrix of shape (n_features, n_samples).
+        - Y_true (numpy.ndarray): The true labels corresponding to each sample in X of shape (n_samples,).
+        - random_state (int, np.random.RandomState, None, optional): Random state for reproducibility. Default is None.
+        Returns:
+        - rmse (float): The root mean squared error of the reconstructed matrix and the original matrix.
+        - acc (float): The accuracy score of the predicted labels based on the clustering results on the reconstructed matrix.
+        - nmi (float): The normalized mutual information score of the predicted labels based on the clustering results on the reconstructed matrix.
+        """
+        Y_label = self.__labeling(self.R.T, Y_true, random_state=random_state)
+        rmse = np.sqrt(mean_squared_error(X_clean, np.dot(self.D, self.R)))
+        acc = accuracy_score(Y_true, Y_label)
+        nmi = normalized_mutual_info_score(Y_true, Y_label)
+        return rmse, acc, nmi
+    def __labeling(self, X: np.ndarray, Y: np.ndarray, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
+        """
+        Label data based on clusters obtained from KMeans clustering,
+        by assigning the most frequent label in each cluster.
+        Parameters:
+        - X (numpy.ndarray): Input feature matrix of shape (n_samples, n_features).
+        - Y (numpy.ndarray): True labels corresponding to each sample in X of shape (n_samples,).
+        Returns:
+        - Y_pred (numpy.ndarray): Predicted labels for each sample based on the clustering results.
+        Note:
+        This function works best when the input data is somewhat separated into distinct
+        clusters that align with the true labels.
+        """
+        cluster = BisectingKMeans(len(set(Y)), random_state=random_state).fit(X)
+        Y_pred = np.zeros(Y.shape)
+        for i in set(cluster.labels_):
+            ind = cluster.labels_ == i
+            Y_pred[ind] = Counter(Y[ind]).most_common(1)[0][0] # assign label.
+        return Y_pred
+    def vectorized_armijo_rule(self, f, grad_f, X, alpha, c=1e-4, tau=0.5):
+        """
+        Vectorized Armijo rule to find the step size for each element in the matrix.
+        Parameters:
+        - f: The objective function, which should accept a matrix and return a scalar.
+        - grad_f: The gradient of the objective function, which returns a matrix.
+        - X: Current point, a matrix.
+        - alpha: Initial step size, a scalar or a matrix.
+        - c: A constant in (0, 1), typically a small value (default is 1e-4).
+        - tau: Reduction factor for step size, typically in (0, 1) (default is 0.5).
+        Returns:
+        - alpha: Step sizes that satisfy the Armijo condition for each element.
+        """
+        # Compute the initial objective function value
+        f_x = f(X)
+        # Compute the initial gradient and its norm squared
+        grad_f_x = grad_f(X)
+        norm_grad_f_x_squared = np.square(np.linalg.norm(grad_f_x, axis=(0,1), keepdims=True))
+        # Compute the sufficient decrease condition for the entire matrix
+        sufficient_decrease = f_x - c * alpha * norm_grad_f_x_squared
+        counter = 0
+        # Check the condition for each element
+        while np.any(f(X - alpha * grad_f_x) > sufficient_decrease) or counter >= 10:
+            # Reduce alpha for elements not satisfying the condition
+            alpha *= tau
+            counter += 1
+        return alpha
+    @classmethod
+    def from_pretrained(cls, file_path: str, **kwargs: Dict[str, float]) -> 'BasicNMF':
+        """
+        Load the model parameters from a file.
+        Parameters:
+        - file_path (str): The path to the file where the model parameters are saved.
+        Returns:
+        - instance (BasicNMF): An instance of the BasicNMF class with the loaded parameters.
+        """
+        import pickle
+        with open(os.path.join(file_path), 'rb') as file:
+            params = pickle.load(file)
+        instance = cls(**kwargs)
+        instance.__dict__.update(params)
+        return instance
+    def save(self, file_path: str) -> None:
+        """
+        Save the model parameters to a file.
+        Parameters:
+        - file_path (str): The path to the file where the model parameters will be saved.
+        """
+        import pickle
+        with open(file_path, 'wb') as file:
+            pickle.dump(self.__dict__, file)
+    def __call__(self, **kwargs: Dict[str, float]):
+        """
+        Overwrite the __call__ method to fit the model with the given parameters.
+        """
+        self.fit(**kwargs)
+class L2NormNMF(BasicNMF):
+    name = 'L2Norm'
+    """
+    L2-norm NMF algorithm.
+    """
+    def __init__(self) -> None:
+        super().__init__()
+    def update(self, X: np.ndarray, threshold: float=1e-6, epsilon: float=1e-7) -> bool:
+        """
+        Update rule for D and R matrices using L2-norm NMF algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - threshold (float, optional): Convergence threshold based on L2-norm. Default is 1e-6.
+        - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
+        Returns:
+        - flag (bool): Whether the algorithm has converged.
+        """
+        # Multiplicative update rule for D and R matrices
+        self.D *= np.dot(X, self.R.T) / (np.dot(np.dot(self.D, self.R), self.R.T) + epsilon)
+        self.R *= np.dot(self.D.T, X) / (np.dot(np.dot(self.D.T, self.D), self.R) + epsilon)
+        # Calculate the loss function
+        loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro') ** 2
+        self.loss_list.append(loss)
+        # Calculate L2-norm based errors for convergence
+        e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
+        e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
+        # Update previous matrices for next iteration
+        self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
+        return (e_D < threshold and e_R < threshold)
+class KLDivergenceNMF(BasicNMF):
+    name = 'KLDivergence'
+    """
+    KL-divergence NMF algorithm.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize the KL-divergence NMF algorithm.
+        """
+        super().__init__()
+        self.prev_kl = float('inf')
+    def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
+        """
+        Update rule for D and R matrices using KL-divergence NMF algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
+        - threshold (float, optional): Convergence threshold based on KL-divergence. Default is 1e-4.
+        Returns:
+        - flag (bool): Whether the algorithm has converged.
+        """
+        # Multiplicative update rule for D and R matrices
+        self.D *= np.dot(X / (np.dot(self.D, self.R) + epsilon), self.R.T) / (np.dot(np.ones(X.shape), self.R.T) + epsilon)
+        self.R *= np.dot(self.D.T, X / (np.dot(self.D, self.R) + epsilon)) / (np.dot(self.D.T, np.ones(X.shape)) + epsilon)
+        # Calculate KL-divergence
+        XR = np.dot(self.D, self.R) + epsilon
+        kl_div = np.sum(X * np.log(np.maximum(epsilon, X / (XR + epsilon))) - X + XR)
+        self.loss_list.append(kl_div)
+        flag = abs(kl_div - self.prev_kl) < threshold
+        self.prev_kl = kl_div  # Update previous KL divergence
+        return flag
+class ISDivergenceNMF(BasicNMF):
+    name = 'ISDivergence'
+    """
+    IS-divergence NMF algorithm.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize the IS-divergence NMF algorithm.
+        """
+        super().__init__()
+        self.prev_is_div = float('inf')
+    def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-6) -> bool:
+        """
+        Update rule for D and R matrices using IS-divergence NMF algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
+        - threshold (float, optional): Convergence threshold based on IS-divergence. Default is 1e-6.
+        Returns:
+        - flag (bool): Whether the algorithm has converged.
+        """
+        # Update R
+        DR = np.dot(self.D, self.R)
+        DR = np.where(DR > 0, DR, epsilon)
+        self.R *= (np.dot(self.D.T, (DR ** (-2) * X))) / (np.dot(self.D.T, DR ** (-1)) + epsilon)
+        # Update D
+        DR = np.dot(self.D, self.R)
+        DR = np.where(DR > 0, DR, epsilon)
+        self.D *= (np.dot((DR ** (-2) * X), self.R.T)) / (np.dot(DR ** (-1), self.R.T) + epsilon)
+        # Normalize D and R
+        self.normalize(epsilon)
+        # Calculate IS-divergence
+        DR = np.dot(self.D, self.R) + epsilon
+        is_div = np.sum(-np.log(np.maximum(epsilon, X / DR)) + X / DR - 1)
+        # Adding L2 regularization terms to the IS-divergence
+        # is_div += lambd * np.linalg.norm(self.D, 'fro') ** 2 + lambd * np.linalg.norm(self.R, 'fro')**2
+        self.loss_list.append(is_div)
+        flag = np.abs(is_div - self.prev_is_div) < threshold
+        self.prev_is_div = is_div
+        return flag
+class L21NormNMF(BasicNMF):
+    name = 'L21Norm'
+    """
+    L21 Norm NMF algorithm.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize the L21 Norm NMF algorithm.
+        """
+        super().__init__()
+    def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
+        """
+        Update rule for D and R matrices using L21 Norm NMF algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
+        - threshold (float, optional): Convergence threshold based on L21 Norm. Default is 1e-4.
+        Returns:
+        - flag (bool): Whether the algorithm has converged.
+        """
+        # Multiplicative update rule for D and R matrices
+        residual = X - np.dot(self.D, self.R) # residual.shape = (n_features, n_samples)
+        norm_values = np.sqrt(np.sum(residual ** 2, axis=1))
+        diagonal = np.diag(1.0 / (norm_values + epsilon)) # diagonal.shape = (n_features, n_features)
+        # Update rule for D
+        self.D *= (np.dot(np.dot(diagonal, X), self.R.T) / (np.dot(np.dot(np.dot(diagonal, self.D), self.R), self.R.T) + epsilon))
+        # Update rule for R
+        self.R *= (np.dot(np.dot(self.D.T, diagonal), X) / (np.dot(np.dot(np.dot(self.D.T, diagonal), self.D), self.R) + epsilon))
+        # Calculate the loss function
+        loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro')
+        self.loss_list.append(loss)
+        # Calculate L2,1-norm based errors for convergence
+        e_D = np.linalg.norm(self.D - self.D_prev, 'fro') / np.linalg.norm(self.D, 'fro')
+        e_R = np.linalg.norm(self.R - self.R_prev, 'fro') / np.linalg.norm(self.R, 'fro')
+        # Update previous matrices for next iteration
+        self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
+        return (e_D < threshold and e_R < threshold)
+class L1NormRegularizedNMF(BasicNMF):
+    name = 'L1NormRegularized'
+    """
+    L1 Norm Regularized NMF algorithm.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize the L1 Norm Regularized NMF algorithm.
+        """
+        super().__init__()
+    # Helper function
+    def soft_thresholding(self, x: np.ndarray, lambd: float) -> np.ndarray:
+        """
+        Soft thresholding operator.
+        Parameters:
+        - x (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - lambd (float): Threshold value.
+        Returns:
+        - y (numpy.ndarray): The updated matrix after applying the soft thresholding operator.
+        """
+        return np.where(x > lambd, x - lambd, np.where(x < -lambd, x + lambd, 0))
+    def update(self, X: np.ndarray, lambd: float=0.2, epsilon: float=1e-7, threshold: float=1e-8) -> bool:
+        """
+        Update rule for D and R matrices using L1 Norm  Regularized NMF algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - lambd (float): Threshold value.
+        - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
+        - threshold (float, optional): Convergence threshold based on L1 Norm Regularized. Default is 1e-8.
+        Returns:
+        - flag (bool): Whether the algorithm has converged.
+        """
+        # Compute the error matrix
+        S = X - np.dot(self.D, self.R)
+        # Soft thresholding operator
+        S = self.soft_thresholding(S, lambd/2)
+        # Multiplicative update rule for D and R matrices
+        update_D = np.dot(S - X, self.R.T)
+        self.D *=  (np.abs(update_D) - update_D) / (2 * np.dot(np.dot(self.D, self.R), self.R.T) + epsilon)
+        update_R = np.dot(self.D.T, S - X)
+        self.R *=  (np.abs(update_R) - update_R) / (2 * np.dot(np.dot(self.D.T, self.D), self.R) + epsilon)
+        self.normalize(epsilon)
+        # Calculate the loss function
+        loss = np.linalg.norm(X - np.dot(self.D, self.R) - S, 'fro') ** 2 + lambd * np.sum(np.abs(S))
+        self.loss_list.append(loss)
+        # Calculate L2-norm based errors for convergence
+        e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
+        e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
+        # Update previous matrices for next iteration
+        self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
+        return (e_D < threshold and e_R < threshold)
+    def matrix_init(self, X: np.ndarray, n_components: int,
+                     random_state: Union[int, np.random.RandomState, None]=None) -> None:
+        return self.Kmeans(X, n_components, random_state)
+class CauchyNMF(BasicNMF):
+    name = 'Cauchy'
+    """
+    Cauchy NMF algorithm.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize the Cauchy NMF algorithm.
+        """
+        super().__init__()
+    # Helper function
+    def compute(self, A: np.ndarray, B: np.ndarray, epsilon: float) -> np.ndarray:
+        """
+        Update rule for Cauchy divergence.
+        Parameters:
+        A (numpy.ndarray): The first matrix, which is noted as A.
+        B (numpy.ndarray): The second matrix, which is noted as B.
+        epsilon (float): Small constant added to denominator to prevent division by zero.
+        Returns:
+        C (numpy.ndarray): The updated matrix.
+        """
+        temp = A ** 2 + 2 * B * A
+        temp = np.where(temp > 0, temp, epsilon)
+        return B / (A + np.sqrt(temp))
+    def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
+        """
+        Update rule for D and R matrices using Cauchy NMF algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
+        - threshold (float, optional): Convergence threshold based on Cauchy. Default is 1e-4.
+        Returns:
+        - flag (bool): Whether the algorithm has converged.
+        """
+        if not hasattr(self, 'prev_cauchy_div'):
+            DR = np.dot(self.D, self.R)
+            log_residual = np.log(DR + epsilon) - np.log(X + epsilon)
+            residual = X - DR
+            self.prev_cauchy_div = np.sum(log_residual + residual / (DR + epsilon))
+        # Update rule for D
+        DR = np.dot(self.D, self.R)
+        A = 3 / 4 * np.dot((DR / (DR ** 2 + X + epsilon)), self.R.T)
+        B = np.dot(1 / (DR + epsilon), self.R.T)
+        self.D *= self.compute(A, B, epsilon)
+        # Update rule for R
+        DR = np.dot(self.D, self.R)
+        A = 3 / 4 * np.dot(self.D.T, (DR / (DR ** 2 + X + epsilon)))
+        B = np.dot(self.D.T, 1 / (DR + epsilon))
+        self.R *= self.compute(A, B, epsilon)
+        # Calculate Cauchy divergence
+        DR = np.dot(self.D, self.R)
+        cauchy_div = np.sum(np.log(DR + epsilon) - np.log(X + epsilon) + (X - DR) / (DR + epsilon))
+        self.loss_list.append(cauchy_div)
+        flag = abs(cauchy_div - self.prev_cauchy_div) < threshold
+        self.prev_cauchy_div = cauchy_div  # Update previous Cauchy divergence
+        return flag
+class CappedNormNMF(BasicNMF):
+    name = 'CappedNorm'
+    """
+    Capped Norm NMF algorithm.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize Capped Norm NMF algorithm.
+        """
+        super().__init__()
+        self.loss_prev = float('inf')
+    # Helper function
+    def matrix_init(self, X: np.ndarray, n_components: int,
+                     random_state: Union[int, np.random.RandomState, None]=None) -> None:
+        return self.Kmeans(X, n_components, random_state)
+    def update(self, X, theta: float=0.2, threshold: float=1e-3, epsilon: float=1e-7) -> bool:
+        """
+        Update rule for D and R matrices using Capped Norm NMF algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - theta (float, optional): Outlier parameter. Default is 0.2.
+        - threshold (float, optional): Convergence threshold based on L2,1-norm. Default is 1e-4.
+        - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
+        """
+        if not hasattr(self, 'I'):
+            self.n_samples = X.shape[1]
+            self.I = np.identity(self.n_samples)
+        # Multiplicative update rule for D and R matrices
+        G = self.R.T
+        self.D *= np.dot(np.dot(X, self.I), G) / (np.dot(np.dot(np.dot(self.D, G.T), self.I), G) + epsilon)
+        G *= np.sqrt((np.dot(np.dot(self.I, X.T), self.D)) / (np.dot(np.dot(np.dot(np.dot(self.I, G), G.T), X.T), self.D) + epsilon))
+        self.R = G.T
+        # Update rule for I
+        diff = X - np.dot(self.D, self.R)
+        norms = np.linalg.norm(diff, axis=0)
+        norms /= np.max(norms)
+        I = np.full_like(norms, epsilon)
+        indices = np.where(norms < theta)
+        I[indices] = 1 / (2 * norms[indices])
+        self.I = np.diagflat(I)
+        # Calculate the loss function
+        loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro') ** 2
+        flag = abs(loss - self.loss_prev) < threshold
+        self.loss_list.append(loss)
+        self.loss_prev = loss
+        return flag
+class HSCostNMF(BasicNMF):
+    name = 'HSCost'
+    """
+    Hypersurface Cost NMF algorithm.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize Hypersurface Cost NMF algorithm.
+        """
+        super().__init__()
+        self.loss_prev = float('inf')
+        # Objective function and its gradient
+        self.obj_func = lambda X, D, R: np.linalg.norm(X - np.dot(D, R), 'fro')
+        self.grad_D = lambda X, D, R: (np.dot((np.dot(D, R) - X), R.T)) / np.sqrt(1 + np.linalg.norm(X - np.dot(D, R), 'fro'))
+        self.grad_R = lambda X, D, R: (np.dot(D.T, (np.dot(D, R) - X))) / np.sqrt(1 + np.linalg.norm(X - np.dot(D, R), 'fro'))
+    def update(self, X: np.ndarray, threshold: float=1e-8, alpha: float=0.1, beta: float=0.1, c: float=1e-4, tau: float=0.5) -> bool:
+        """
+        Update rule for D and R matrices using Hypersurface Cost NMF algorithm.
+        Parameters:
+        - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
+        - alpha (float, optional): Learning rate for gradient descent. Default is 0.1.
+        - beta (float, optional): Learning rate for gradient descent. Default is 0.1.
+        - c (float, optional): A constant in (0, 1), typically a small value. Default is 1e-4.
+        - tau (float, optional): A reduction factor for step size, typically in (0, 1). Default is 0.5.
+        Returns:
+        - flag (bool): Whether the algorithm has converged.
+        """
+        if not hasattr(self, 'alpha'):
+            self.alpha = np.full_like(self.D, alpha)
+            self.beta = np.full_like(self.R, beta)
+        # Vectorized Armijo rule to update alpha and beta
+        self.alpha = self.vectorized_armijo_rule(lambda D: self.obj_func(X, D, self.R), lambda D: self.grad_D(X, D, self.R), self.D, self.alpha, c, tau)
+        self.beta = self.vectorized_armijo_rule(lambda R: self.obj_func(X, self.D, R), lambda R: self.grad_R(X, self.D, R), self.R, self.beta, c, tau)
+        self.alpha = np.maximum(self.alpha, threshold)
+        self.beta = np.maximum(self.beta, threshold)
+        # Update rule for D and R
+        self.D -= self.alpha * (np.dot((np.dot(self.D, self.R) - X), self.R.T)) / np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro'))
+        self.R -= self.beta * (np.dot(self.D.T, (np.dot(self.D, self.R) - X))) / np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro'))
+        self.D[np.where(self.D < 0)] = 0
+        self.R[np.where(self.R < 0)] = 0
+        # Calculate loss
+        loss_current = np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro')) - 1
+        self.loss_list.append(loss_current)
+        flag = abs(loss_current - self.loss_prev) < threshold
+        # Update previous loss for next iteration
+        self.loss_prev = loss_current
+        return flag

algorithm/pipeline.py ADDED Viewed

	@@ -0,0 +1,371 @@

+import os
+import csv
+import logging
+from typing import Union, List, Tuple, Generator
+import numpy as np
+import pandas as pd
+from algorithm.datasets import load_data, get_image_size
+from algorithm.preprocess import NoiseAdder, MinMaxScaler, StandardScaler
+from algorithm.sample import random_sample
+from algorithm.nmf import BasicNMF, L2NormNMF, KLDivergenceNMF, ISDivergenceNMF, L21NormNMF, HSCostNMF, L1NormRegularizedNMF, CappedNormNMF, CauchyNMF
+from algorithm.user_evaluate import evaluate
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def summary(log_file_name: str) -> pd.DataFrame:
+    """
+    Parameter:
+    log_file_name (str): The name of the log file to read.
+    Return:
+    result (pandas.DataFrame): The summary of the log file.
+    """
+    df = pd.read_csv(log_file_name)
+    result = df.groupby(by=['dataset', 'noise_type', 'noise_level'])[['rmse', 'nmi', 'acc']].mean()
+    return result
+class BasicBlock(object):
+    """
+    Basic block for the pipeline.
+    """
+    def basic_info(self, nmf: Union[BasicNMF, str], dataset: str, scaler: str) -> Tuple[str, Union[MinMaxScaler, StandardScaler], BasicNMF]:
+        """
+        Get the basic information for the pipeline.
+        Parameters:
+        - nmf (Union[BasicNMF, str]): NMF algorithm to use.
+        - dataset (str): Name of the dataset to use.
+        - scaler (str): Name of the scaler to use.
+        Returns:
+        - folder (str): Folder of the dataset.
+        - scaler (MinMaxScaler or StandardScaler): Scaler to use.
+        - nmf (BasicNMF): NMF algorithm to use.
+        """
+        # Create mappings for the NMF algorithms, datasets, and scalers
+        # Store NMF algorithms in a dictionary
+        nmf_dict = {
+                'L2NormNMF': L2NormNMF,
+                'KLDivergenceNMF': KLDivergenceNMF,
+                'ISDivergenceNMF': ISDivergenceNMF,
+                'L21NormNMF': L21NormNMF,
+                'HSCostNMF': HSCostNMF,
+                'L1NormRegularizedNMF': L1NormRegularizedNMF,
+                'CappedNormNMF': CappedNormNMF,
+                'CauchyNMF': CauchyNMF
+        }
+        # Store datasets in a dictionary
+        dataset_dict = {
+                'ORL': 'data/ORL',
+                'YaleB': 'data/CroppedYaleB'
+        }
+        # Store scalers in a dictionary
+        scaler_dict = {
+                'MinMax': MinMaxScaler(),
+                'Standard': StandardScaler()
+        }
+        folder = dataset_dict.get(dataset, 'data/ORL')
+        # Scale the data
+        scaler = scaler_dict.get(scaler, MinMaxScaler())
+        # Choose an NMF algorithm
+        if isinstance(nmf, BasicNMF):
+            nmf = nmf
+        else:
+             # Choose an NMF algorithm
+            nmf = nmf_dict.get(nmf, L1NormRegularizedNMF)()
+        return folder, scaler, nmf
+    def load_data(self, folder: str, reduce: int=1, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray, Tuple[int, int]]:
+        """
+        Load the data.
+        Parameters:
+        - folder (str): Folder of the dataset.
+        - reduce (int): Factor by which the image size is reduced for visualization.
+        - random_state (Union[int, np.random.RandomState, None]): Random state to use for sampling.
+        Returns:
+        - X_hat (np.ndarray): The data matrix.
+        - Y_hat (np.ndarray): The label matrix.
+        - img_size (Tuple[int, int]): Size of the images.
+        """
+        # Load ORL dataset
+        X_hat, Y_hat = load_data(folder, reduce=reduce)
+        # Randomly sample 90% of the data
+        X_hat, Y_hat = random_sample(X_hat, Y_hat, 0.9, random_state=random_state)
+        # Get the size of images
+        img_size = get_image_size(folder)
+        return X_hat, Y_hat, img_size
+    def add_noise(self, X_hat: np.ndarray, noise_type: str, noise_level: float, random_state: Union[int, np.random.RandomState, None], reduce: int) -> np.ndarray:
+        """
+        Add noise to the data.
+        Parameters:
+        - X_hat (np.ndarray): The data matrix.
+        - noise_type (str): Type of noise to add to the data.
+        - noise_level (float): Level of noise to add to the data.
+        - random_state (Union[int, np.random.RandomState, None]): Random state to use for adding noise.
+        - reduce (int): Factor by which the image size is reduced for visualization.
+        Returns:
+        - X_noise (np.ndarray): The noisy data matrix.
+        """
+        # Set random state and noise adder
+        noise_adder = NoiseAdder(random_state=random_state)
+        # Create a dictionary of noise functions
+        noise_dict = {
+                'uniform': (noise_adder.add_uniform_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
+                'gaussian': (noise_adder.add_gaussian_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
+                'laplacian': (noise_adder.add_laplacian_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
+                'salt_and_pepper': (noise_adder.add_salt_and_pepper_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
+                'block': (noise_adder.add_block_noise, {'X_hat': X_hat, 'block_size': noise_level, 'img_width': self.img_size[0]//reduce})
+        }
+        # Map the noise type to the noise function
+        noise_func, args = noise_dict.get(noise_type, (noise_adder.add_uniform_noise, {'X_hat': X_hat, 'noise_level': noise_level}))
+        # Add noise to the data
+        _, X_noise = noise_func(**args)
+        return X_noise
+    def scale(self, X_hat: np.ndarray, X_noise: np.ndarray, scaler: Union[MinMaxScaler, StandardScaler]) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Scale the data.
+        Parameters:
+        - X_hat (np.ndarray): The data matrix.
+        - X_noise (np.ndarray): The noisy data matrix.
+        - scaler (MinMaxScaler or StandardScaler): Scaler to use for scaling the data.
+        Returns:
+        - X_hat_scaled (np.ndarray): The scaled data matrix.
+        - X_noise_scaled (np.ndarray): The scaled noisy data matrix.
+        """
+        # Scale the data
+        X_hat_scaled = scaler.fit_transform(X_hat)
+        X_noise_scaled = scaler.transform(X_noise)
+        # Ensure that the scaled noisy data is non-negative
+        X_noise_scaled += np.abs(np.min(X_noise_scaled)) * np.abs(np.min(X_noise_scaled)) * int(np.min(X_noise_scaled) < 0)
+        return X_hat_scaled, X_noise_scaled
+class Pipeline(BasicBlock):
+    def __init__(self, nmf: Union[str, BasicNMF], dataset: str='ORL', reduce: int=1, noise_type: str='uniform',
+                 noise_level: float=0.02, random_state: int=3407, scaler: str='MinMax') -> None:
+        """
+        Initialize the pipeline.
+        Parameters:
+        - nmf (str or BasicNMF): Name of the NMF algorithm to use.
+        - dataset (str): Name of the dataset to use.
+        - reduce (int): Factor by which the image size is reduced for visualization.
+        - noise_type (str): Type of noise to add to the data.
+        - noise_level (float): Level of noise to add to the data.
+        - random_state (int): Random state to use for the NMF algorithm.
+        - scaler (str): Name of the scaler to use for scaling the data.
+        Returns:
+        None. The function will initialize the pipeline.
+        """
+        # Get the basic information for the pipeline
+        folder, scaler, self.nmf = self.basic_info(nmf, dataset, scaler)
+        # Load the data
+        X_hat, self.__Y_hat, self.img_size = self.load_data(folder, reduce=reduce, random_state=random_state)
+        # Add noise to the data
+        X_noise = self.add_noise(X_hat, noise_type, noise_level, random_state, reduce)
+        # Scale the data
+        self.__X_hat_scaled, self.__X_noise_scaled = self.scale(X_hat, X_noise, scaler)
+        self.reduce = reduce
+        self.random_state = random_state
+        # Delete the attributes that might occupy significant memory
+        del X_hat, X_noise, folder, scaler, noise_type, noise_level, random_state, dataset, reduce, nmf
+    def execute(self, max_iter: int, convergence_trend: bool=False, matrix_size: bool=False, verbose: bool=False) -> None:
+        """
+        Run the pipeline.
+        Parameters:
+        - max_iter (int): Maximum number of iterations to run the NMF algorithm.
+        - convergence_trend (bool): Whether to display the convergence trend of the NMF algorithm.
+        - matrix_size (bool): Whether to display the size of the basis and coefficient matrices.
+        - verbose (bool): Whether to display the verbose output of the NMF algorithm.
+        """
+        # Run NMF
+        self.nmf.fit(self.__X_noise_scaled, len(set(self.__Y_hat)), max_iter=max_iter,
+                     random_state=self.random_state, imshow=convergence_trend, verbose=verbose)
+        # Get the dictionary and representation matrices
+        self.D, self.R = self.nmf.D, self.nmf.R
+        if matrix_size:
+            print('D.shape={}, R.shape={}'.format(self.D.shape, self.R.shape))
+        self.metrics = self.nmf.evaluate(self.__X_hat_scaled, self.__Y_hat, random_state=self.random_state)
+        return self.metrics
+    def evaluate(self, idx: int=2, imshow: bool=False) -> None:
+        """
+        Evaluate the NMF algorithm.
+        Parameters:
+        - idx (int): Index of the image to evaluate.
+        - imshow (bool): Whether to display the images.
+        """
+        evaluate(self.nmf, self.metrics, self.__X_hat_scaled, self.__X_noise_scaled,
+                self.img_size, self.reduce, idx, imshow)
+    def visualization(self, idx: int=2) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Visualize the NMF algorithm.
+        Parameters:
+        - idx (int): Index of the image to visualize.
+        Returns:
+        - X_i (np.ndarray): The original image.
+        - X_noise_i (np.ndarray): The noisy image.
+        - DR_i (np.ndarray): The reconstructed image.
+        """
+        DR = np.dot(self.D, self.R).reshape(self.__X_hat_scaled.shape[0], self.__X_hat_scaled.shape[1])
+        # Calculate reduced image size based on the 'reduce' factor
+        img_size = [i//self.reduce for i in self.img_size]
+        # Retrieve the specified image from the data
+        X_i = self.__X_hat_scaled[:,idx].reshape(img_size[1],img_size[0])
+        X_noise_i = self.__X_noise_scaled[:,idx].reshape(img_size[1],img_size[0])
+        DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
+        return X_i, X_noise_i, DR_i
+    def cleanup(self) -> None:
+        """
+        Cleanup method to release resources and delete instances.
+        """
+        # Delete attributes that might occupy significant memory
+        if hasattr(self, 'nmf'):
+            del self.nmf, self.__X_hat_scaled, self.__X_noise_scaled, self.D, self.R, self.metrics
+class Experiment:
+    """
+    Set up the experiment.
+    """
+    data_dirs = ['data/ORL', 'data/CroppedYaleB']
+    data_container = [[], []]
+    noises = {
+        'uniform': [0.1, 0.3],
+        'gaussian': [0.05, 0.08],
+        'laplacian': [0.04, 0.06],
+        'salt_and_pepper': [0.02, 0.1],
+        'block': [10, 15],}
+    nmf_dict = {
+        'L2NormNMF': L2NormNMF,
+        'KLDivergenceNMF': KLDivergenceNMF,
+        'ISDivergenceNMF': ISDivergenceNMF,
+        'L21NormNMF': L21NormNMF,
+        'HSCostNMF': HSCostNMF,
+        'L1NormRegularizedNMF': L1NormRegularizedNMF,
+        'CappedNormNMF': CappedNormNMF,
+        'CauchyNMF': CauchyNMF,}
+    def __init__(self,
+                 seeds: List[int]=None) -> None:
+        """
+        Initialize the experiment.
+        Parameters:
+        - seeds (List[int]): Random seeds to use for the experiment.
+        """
+        self.seeds = [0, 42, 99, 512, 3407] if seeds is None else seeds
+    def choose(self, nmf: Union[str, BasicNMF]) -> None:
+        """
+        Choose an NMF algorithm. Essentially, this method sets the NMF algorithm to use for the experiment.
+        nmf (Union[str, BasicNMF]): NMF algorithm to use.
+        """
+        if isinstance(nmf, BasicNMF):
+            self.nmf = nmf
+        else:
+             # Choose an NMF algorithm
+            self.nmf = self.nmf_dict.get(nmf, L1NormRegularizedNMF)()
+    def data_loader(self) -> Generator[Tuple[str, int, np.ndarray, np.ndarray, np.ndarray, str, float], None, None]:
+        """
+        Construct a generator to load the data.
+        Returns:
+        - data_file (str): Name of the dataset.
+        - seed (int): Random seed to use for the experiment.
+        - X_hat_scaled (np.ndarray): The scaled data matrix.
+        - Y_hat (np.ndarray): The label matrix.
+        - X_noise_scaled (np.ndarray): The scaled noisy data matrix.
+        - noise_type (str): Type of noise to add to the data.
+        - noise_level (float): Level of noise to add to the data.
+        """
+        scaler = MinMaxScaler()
+        # Data file loop
+        for data_file in self.data_dirs:
+            reduce = 1 if data_file.endswith('ORL') else 3
+            image_size = get_image_size(data_file)
+            X_hat_, Y_hat_ = load_data(root=data_file, reduce=reduce)
+            # Random seed loop
+            for seed in self.seeds:
+                noise_adder = NoiseAdder(random_state=seed)
+                X_hat, Y_hat = random_sample(X_hat_, Y_hat_, 0.9, random_state=seed)
+                X_hat_scaled = scaler.fit_transform(X_hat)
+                # Noise type loop
+                for noise_type in self.noises:
+                    add_noise_ = getattr(noise_adder, f'add_{noise_type}_noise')
+                    # Noise level loop
+                    for noise_level in self.noises[noise_type]:
+                        _, X_noise = add_noise_(X_hat, noise_level=noise_level) if noise_type != 'block' else add_noise_(X_hat, image_size[0]//reduce, noise_level)
+                        X_noise_scaled = scaler.transform(X_noise)
+                        X_noise_scaled += np.abs(np.min(X_noise_scaled)) * np.abs(np.min(X_noise_scaled)) * int(np.min(X_noise_scaled) < 0)
+                        yield data_file.split("/")[-1], seed, X_hat_scaled, Y_hat, X_noise_scaled, noise_type, noise_level
+    def sync_fit(self, dataset: str, seed: int, X_hat_scaled: np.ndarray, Y_hat: np.ndarray, X_noise_scaled: np.ndarray, noise_type: str, noise_level: float) -> Tuple[str, str, float, int, float, float, float]:
+        """
+        Fit the NMF algorithm on the dataset with noise synchronously.
+        Parameters:
+        - dataset (str): Name of the dataset.
+        - seed (int): Random seed to use for the experiment.
+        - X_hat_scaled (np.ndarray): The scaled data matrix.
+        - Y_hat (np.ndarray): The label matrix.
+        - X_noise_scaled (np.ndarray): The scaled noisy data matrix.
+        - noise_type (str): Type of noise to add to the data.
+        - noise_level (float): Level of noise to add to the data.
+        Returns:
+        - dataset (str): Name of the dataset.
+        - noise_type (str): Type of noise to add to the data.
+        - noise_level (float): Level of noise to add to the data.
+        - seed (int): Random seed to use for the experiment.
+        - rmse (float): Root mean squared error of the NMF algorithm.
+        - acc (float): Accuracy of the NMF algorithm.
+        - nmi (float): Normalized mutual information of the NMF algorithm.
+        """
+        self.nmf.fit(X_noise_scaled, len(set(Y_hat)), random_state=seed, verbose=False)
+        # Display the current experiment information
+        logging.info(f'Dataset: {dataset} Random seed: {seed} - Test on {noise_type} with {noise_level} ended.')
+        return dataset, noise_type, noise_level, seed, *self.nmf.evaluate(X_hat_scaled, Y_hat, random_state=seed)
+    def execute(self) -> None:
+        """
+        Execute the experiments.
+        """
+        # Lazy import to avoid multiprocessing error
+        import multiprocessing
+        results = []
+        # Define the multiprocessing pool
+        with multiprocessing.Pool(10) as pool:
+            for result in pool.starmap(self.sync_fit, self.data_loader()):
+                # Append the result to the list
+                results.append(result)
+        # Write the results to a csv file
+        if not os.path.exists(f'{self.nmf.name}_log.csv'):
+            mode = 'w'
+        else:
+            mode = 'a'
+        with open(f'{self.nmf.name}_log.csv', mode) as f:
+            writer = csv.writer(f)
+            if mode == 'w':
+                writer.writerow(['dataset', 'noise_type', 'noise_level', 'seed', 'rmse', 'acc', 'nmi'])
+            for result in results:
+                writer.writerow(result)

algorithm/preprocess.py ADDED Viewed

	@@ -0,0 +1,234 @@

+from typing import Union, Tuple
+import numpy as np
+class MinMaxScaler:
+    """
+    This class scales and transforms features to [0, 1].
+    """
+    def fit(self, X: np.ndarray) -> None:
+        """
+        Compute the minimum and the range of the data for later scaling.
+        Parameters:
+        - X: numpy array-like, shape (n_samples, n_features)
+            The data used to compute the minimum and range used for later scaling.
+        """
+        self.min_ = np.min(X, axis=0)
+        self.range_ = np.max(X, axis=0) - self.min_
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        """
+        Scale the data using the values computed during the fit method.
+        Parameters:
+        - X: numpy array-like, shape (n_samples, n_features)
+            Input data that needs to be scaled.
+        Returns:
+        - numpy array, shape (n_samples, n_features)
+            Transformed data.
+        """
+        return (X - self.min_) / self.range_
+    def fit_transform(self, X: np.ndarray) -> np.ndarray:
+        """
+        Fit to the data and then transform it.
+        Parameters:
+        - X: numpy array-like, shape (n_samples, n_features)
+            Input data that needs to be scaled and transformed.
+        Returns:
+        - numpy array, shape (n_samples, n_features)
+            Transformed data.
+        """
+        self.fit(X)
+        return self.transform(X)
+class StandardScaler:
+    """
+    This class standardizes features by removing the mean and scaling to unit variance.
+    """
+    def fit(self, X: np.ndarray) -> None:
+        """
+        Compute the mean and standard deviation of the data for later standardization.
+        Parameters:
+        - X: numpy array-like, shape (n_samples, n_features)
+            The data used to compute the mean and standard deviation used for later standardization.
+        """
+        self.mean_ = np.mean(X, axis=0)
+        self.std_ = np.std(X, axis=0)
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        """
+        Standardize the data using the values computed during the fit method.
+        Parameters:
+        - X: numpy array-like, shape (n_samples, n_features)
+            Input data that needs to be standardized.
+        Returns:
+        - numpy array, shape (n_samples, n_features)
+            Transformed data.
+        """
+        return (X - self.mean_) / self.std_
+    def fit_transform(self, X: np.ndarray) -> np.ndarray:
+        """
+        Fit to the data and then transform it.
+        Parameters:
+        - X: numpy array-like, shape (n_samples, n_features)
+            Input data that needs to be standardized and transformed.
+        Returns:
+        - numpy array, shape (n_samples, n_features)
+            Transformed data.
+        """
+        self.fit(X)
+        return self.transform(X)
+class NoiseAdder:
+    """
+        This class adds noise to data.
+    """
+    def __init__(self, random_state: Union[int, np.random.RandomState, None]=None) -> None:
+        """
+        Initializes the NoiseAdder with a random state and noise parameters.
+        Parameters:
+        - random_state (int or RandomState instance or None): Controls the randomness. If int, is used as seed for RandomState.
+        - noise_params (dict): Additional noise parameters.
+        """
+        self.rng = np.random.RandomState(random_state)
+    def add_uniform_noise(self, X_hat: np.ndarray, noise_level: int=0.1) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Add uniform random noise to data.
+        Parameters:
+        - X_hat (numpy array): Original data.
+        Returns:
+        - Numpy array of uniform noise.
+        - Numpy array with added uniform noise.
+        """
+        a, b = 0, 1
+        # Generate noise
+        X_noise = self.rng.uniform(a, b, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
+        return X_noise, X_hat + X_noise
+    def add_gaussian_noise(self, X_hat, noise_level=0.1):
+        """
+        Add Gaussian noise to data.
+        Parameters:
+        - X_hat (numpy array): Original data.
+        - mean (float): Mean of the Gaussian distribution.
+        - std (float): Standard deviation of the Gaussian distribution.
+        Returns:
+        - Numpy array of Gaussian noise.
+        - Numpy array with added Gaussian noise.
+        """
+        mean, std = 0, 1
+        # Generate noise
+        X_noise = self.rng.normal(mean, std, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
+        return X_noise, X_hat + X_noise
+    def add_laplacian_noise(self, X_hat, noise_level=0.1):
+        """
+        Add Laplacian noise to data.
+        Parameters:
+        - X_hat (numpy array): Original data.
+        - mu (float): Location parameter for the Laplacian distribution.
+        - lambd (float): Scale (diversity) parameter for the Laplacian distribution.
+        Returns:
+        - Numpy array of Laplacian noise.
+        - Numpy array with added Laplacian noise.
+        """
+        # Initialize parameters
+        mu, lambd = 0, 1
+        # Generate noise
+        X_noise = self.rng.laplace(mu, lambd, size=X_hat.shape) * noise_level * np.max(X_hat)
+        return X_noise, X_hat + X_noise
+    def add_block_noise(self, X_hat: np.ndarray, img_width: int, block_size: int=10) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Add block noise to multiple flattened image samples.
+        Parameters:
+        - X (numpy array): Array of shape (m, n) where m is flattened image length and n is number of samples
+        - img_width (int): width of the original image
+        - block_size (int): size of the block to occlude
+        Returns:
+        - Numpy array of noise added to each sample
+        - Numpy array with added block noise for all samples
+        """
+        # Initalize parameters
+        X = X_hat.copy()
+        m, n_samples = X.shape
+        X_noise = np.zeros((m, n_samples), dtype=np.uint8)
+        # For each sample in X
+        for i in range(n_samples):
+            sample = X[:, i]
+            # Reshape the flattened array to 2D
+            img_2d = sample.reshape(-1, img_width)
+            height, width = img_2d.shape
+            # Ensure the block size isn't larger than the image dimensions
+            block_size = min(block_size, width, height)
+            # Generate a random starting point for the block
+            x_start = self.rng.randint(0, width - block_size)
+            y_start = self.rng.randint(0, height - block_size)
+            # Add block noise
+            img_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
+            # Store the noise block to noise array
+            noise_2d = np.zeros((height, width), dtype=np.uint8)
+            noise_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
+            X_noise[:, i] = noise_2d.ravel()
+            # Flatten the array back to 1D and store back in X
+            X[:, i] = img_2d.ravel()
+        return X_noise, X
+    def add_salt_and_pepper_noise(self, X_hat, noise_level=0.02, salt_ratio=0.5) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Add "salt and pepper" noise to data.
+        Parameters:
+        - X_hat (numpy array): Original data.
+        - amount (float): Proportion of image pixels to be replaced.
+        - salt_ratio (float): Proportion of replaced pixels that are "salt".
+        Returns:
+        - Numpy array of salt and pepper noise.
+        - Numpy array with added salt and pepper noise.
+        """
+        # Initialize parameters
+        X = X_hat.copy()
+        X_noise = np.zeros_like(X)
+        # Get the total number of pixels that should be replaced by noise
+        total_pixels = X.size
+        num_noise_pixels = int(total_pixels * noise_level)
+        # Separate the number of salt and pepper pixels based on the salt_ratio
+        num_salt = int(num_noise_pixels * salt_ratio)
+        num_pepper = num_noise_pixels - num_salt
+        # Directly generate the noise coordinates without overlap
+        noise_coords = self.rng.choice(total_pixels, num_noise_pixels, replace=False)
+        salt_coords = noise_coords[:num_salt]
+        pepper_coords = noise_coords[num_salt:]
+        # Convert the 1D noise coordinates back to tuple of N-dim coordinates
+        salt_coords = np.unravel_index(salt_coords, X.shape)
+        pepper_coords = np.unravel_index(pepper_coords, X.shape)
+        # Set salt and pepper pixels in the image
+        max_pixel_val = np.max(X)
+        X_noise[salt_coords] = max_pixel_val
+        X_noise[pepper_coords] = 0
+        X[salt_coords] = max_pixel_val
+        X[pepper_coords] = 0
+        return X_noise, X

algorithm/sample.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from typing import Union
+import numpy as np
+def random_sample(X: np.ndarray, Y: np.ndarray, fraction: float=0.90, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
+    """
+    Randomly sample a fraction of the data.
+    Parameters:
+    - X (numpy.ndarray): The input data matrix of shape (n_features, n_samples)
+                            where n_samples is the number of samples, and n_features
+                            is the number of features.
+    - Y (numpy.ndarray): The output data matrix of shape (n_samples, )
+    - fraction (float): The fraction of the data to be sampled.
+    - random_state (int): The seed for the random number generator.
+    Returns:
+    - X_sample (numpy.ndarray): The sampled data matrix of shape (n_features, n_samples)
+                                where n_samples is the number of samples, and n_features
+    """
+    # Create a random number generator
+    rng = np.random.default_rng(random_state)
+    # Compute the number of samples to be drawn
+    n_samples = X.shape[1]
+    sample_size = int(fraction * n_samples)
+    # Randomly sample the indices
+    sampled_indices = rng.choice(n_samples, sample_size, replace=False)
+    # Use the sampled indices to extract columns from the original data
+    X_sample = X[:, sampled_indices]
+    Y_sample = Y[sampled_indices]
+    return X_sample, Y_sample

algorithm/user_evaluate.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from typing import Tuple
+import numpy as np
+from algorithm.nmf import BasicNMF
+from algorithm.visualize import origin_versus_dictrep
+def evaluate(nmf: BasicNMF, metrics: Tuple, X: np.ndarray, X_noise: np.ndarray,
+             image_size: tuple, reduce: int, idx=2, imshow: bool=False) -> None:
+    """
+    Evaluate the performance of NMF algorithms.
+    Parameters
+    - nmf (BasicNMF): The NMF algorithm.
+    - metrics (tuple): The evaluation metrics, (rmse, acc, nmi).
+    - X (numpy.ndarray): The original data matrix, shape (n_samples, n_features).
+    - X_noise (numpy.ndarray): The noisy data matrix, shape (n_samples, n_features).
+    - image_size (tuple): The size of images.
+    - reduce (int): The reduction ratio of images.
+    - idx (int): The index of the image to be visualized.
+    - random_state (int): The random state.
+    """
+    # Start to evaluate
+    print('Evaluating...')
+    rmse, acc, nmi = metrics
+    # Visualize
+    print('RMSE = {:.4f}'.format(rmse))
+    print('Accuracy = {:.4f}'.format(acc))
+    print('NMI = {:.4f}'.format(nmi))
+    if imshow:
+        origin_versus_dictrep(X, nmf.D, nmf.R, X_noise, image_size=image_size, reduce=reduce, idx=idx)

algorithm/visualize.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import numpy as np
+import matplotlib.pyplot as plt
+def origin_plus_noise(X_hat: np.ndarray, X_noise: np.ndarray, X: np.ndarray, image_size: tuple, reduce: int, idx: int=2) -> None:
+    """
+    Display the original image, the noise, and the image with added noise side by side.
+    Parameters:
+    - X_hat (numpy.ndarray): Original image data.
+    - X_noise (numpy.ndarray): Noise data to be added to the original image.
+    - image_size (tuple): Size of the original image as (height, width).
+    - reduce (int): Factor to downscale the image dimensions.
+    - idx (int, optional): Index of the image to be displayed. Default is 2.
+    """
+    # Calculate reduced image size based on the 'reduce' factor
+    img_size = [i//reduce for i in image_size]
+    # Retrieve the specified image from the data
+    X_hat_i = X_hat[:,idx].reshape(img_size[1],img_size[0])
+    X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
+    X_i = X[:,idx].reshape(img_size[1],img_size[0])
+    # Set up the figure for displaying images
+    plt.figure(figsize=(12,3))  # Adjusted size for better visualization
+    # Display the original image
+    plt.subplot(151)  # Adjusted to 1x4 grid for space to '+' and '=' symbols
+    plt.imshow(X_hat_i, cmap=plt.cm.gray)
+    plt.title('Image(Original)')
+    plt.axis('off')  # Hide axis for a cleaner look
+    # Place '+' symbol between images
+    plt.subplot(152)
+    plt.text(0.5, 0.5, '+', fontsize=20, ha='center', va='center')
+    plt.axis('off')  # Hide axis
+    # Display the noise
+    plt.subplot(153)
+    plt.imshow(X_noise_i, cmap=plt.cm.gray)
+    plt.title('Noise')
+    plt.axis('off')  # Hide axis for a cleaner look
+    # Place '=' symbol between images
+    plt.subplot(154)
+    plt.text(0.5, 0.5, '=', fontsize=20, ha='center', va='center')
+    plt.axis('off')  # Hide axis
+    # Display the image with added noise
+    plt.subplot(155)
+    plt.imshow(X_i, cmap=plt.cm.gray)
+    plt.title('Image(Noise)')
+    plt.axis('off')  # Hide axis for a cleaner look
+    # Render the figure
+    plt.tight_layout()  # Ensure no overlap between subplots
+    plt.show()
+def origin_versus_dictrep(X: np.ndarray, D: np.ndarray, R: np.ndarray, X_noise: np.ndarray, image_size: tuple, reduce: int, idx: int) -> None:
+    """
+    Display the original, noise-added, and dictionary-reconstructed images side by side.
+    Parameters:
+    - X (numpy.ndarray): Original data matrix of shape (n_samples, n_features).
+    - D (numpy.ndarray): Basis matrix obtained from dictionary learning.
+    - R (numpy.ndarray): Coefficient matrix.
+    - X_noise (numpy.ndarray): Noise-added version of the original data matrix.
+    - image_size (tuple): Tuple containing the height and width of the image.
+    - reduce (int): Factor by which the image size is reduced for visualization.
+    - idx (int): Index of the image to display.
+    Returns:
+    None. The function will plot and display the images using matplotlib.
+    """
+    DR = np.dot(D, R).reshape(X.shape[0], X.shape[1])
+    # Calculate reduced image size based on the 'reduce' factor
+    img_size = [i//reduce for i in image_size]
+    # Retrieve the specified image from the data
+    X_i = X[:,idx].reshape(img_size[1],img_size[0])
+    X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
+    DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
+    # Set up the figure for displaying images
+    plt.figure(figsize=(12,3))  # Adjusted size for better visualization
+    # Display the original image
+    plt.subplot(131)
+    plt.imshow(X_i, cmap=plt.cm.gray)
+    plt.title('Image(Original)')
+    plt.axis('off')
+    # Display the reconstructed image
+    plt.subplot(132)
+    plt.imshow(X_noise_i, cmap=plt.cm.gray)
+    plt.title('Image(Noise)')
+    plt.axis('off')
+    # Display the sparse coefficients
+    plt.subplot(133)
+    plt.imshow(DR_i, cmap=plt.cm.gray)
+    plt.title('Image(Reconstructed))')
+    plt.axis('off')
+    # Render the figure
+    plt.tight_layout()
+    plt.show()
+    return X_i, X_noise_i, DR_i
+def origin_noise_dictrep(X: np.ndarray, X_noise: np.ndarray, D: np.ndarray, R: np.ndarray, image_size: tuple, reduce: int, idx: int) -> None:
+    """
+    Display the original image, its noise version, and its dictionary-reconstructed representation side by side.
+    Parameters:
+    - X (numpy.ndarray): Original data matrix of shape (n_samples, n_features).
+    - X_noise (numpy.ndarray): Noise-added version of the original data matrix.
+    - D (numpy.ndarray): Basis matrix obtained from dictionary learning.
+    - R (numpy.ndarray): Coefficient matrix.
+    - image_size (tuple): Tuple containing the height and width of the image.
+    - reduce (int): Factor by which the image size is reduced for visualization.
+    - idx (int): Index of the image to display.
+    Returns:
+    None. The function will plot and display the images using matplotlib.
+    """
+    DR = np.dot(D, R).reshape(X.shape[0], X.shape[1])
+    # Calculate reduced image size based on the 'reduce' factor
+    img_size = [i//reduce for i in image_size]
+    # Retrieve the specified image from the data
+    X_i = X[:,idx].reshape(img_size[1],img_size[0])
+    X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
+    DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
+    # Set up the figure for displaying images
+    plt.figure(figsize=(12,3))  # Adjusted size for better visualization
+    # Display the original image
+    plt.subplot(131)
+    plt.imshow(X_i, cmap=plt.cm.gray)
+    plt.title('Image(Original)')
+    plt.axis('off')
+    # Display the noise
+    plt.subplot(132)
+    plt.imshow(X_noise_i, cmap=plt.cm.gray)
+    plt.title('Image(Noise)')
+    plt.axis('off')
+    # Display the reconstructed image
+    plt.subplot(133)
+    plt.imshow(DR_i, cmap=plt.cm.gray)
+    plt.title('Image(Reconstructed)')
+    plt.axis('off')
+    # Render the figure
+    plt.tight_layout()
+    plt.show()

app.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import PIL
+import numpy as np
+import gradio as gr
+from algorithm.pipeline import Pipeline
+class App:
+    def __init__(self,
+                nmf='L1NormRegularizedNMF',
+                dataset='YaleB',
+                reduce=3,
+                noise_type='salt_and_pepper',
+                noise_level=0.10,
+                random_state=99,
+                scaler='MinMax'):
+        self.pipeline = Pipeline(nmf=nmf,
+                            dataset=dataset,
+                            reduce=reduce,
+                            noise_type=noise_type,
+                            noise_level=noise_level,
+                            random_state=random_state,
+                            scaler=scaler)
+    def align_reduce(self, dataset_name):
+        return 1 if dataset_name == 'ORL' else 3
+    def reset_pipeline(self, nmf, dataset, reduce, noise_type, noise_level, random_state, scaler):
+        noise_type, noise_level = self.convert_level_to_number(noise_type, noise_level)
+        self.pipeline = Pipeline(nmf=nmf,
+                            dataset=dataset,
+                            reduce=reduce,
+                            noise_type=noise_type,
+                            noise_level=noise_level,
+                            random_state=random_state,
+                            scaler=scaler)
+    def convert_level_to_number(self, type, level):
+        map_dict = {"Uniform": {"Low": 0.1, "High": 0.3},
+                    "Gaussian": {"Low": 0.05, "High": 0.08},
+                    "Laplacian": {"Low": 0.04, "High": 0.06},
+                    "Salt & Pepper": {"Low": 0.02, "High": 0.1},
+                    "Block": {"Low": 10, "High": 15}}
+        type_name = type.lower() if type != "Salt & pepper" else "salt_and_pepper"
+        return type_name, map_dict[type][level]
+    def execute(self, max_iter=500, idx=9):
+        self.pipeline.execute(max_iter=max_iter)
+        return *self.visualize(idx), *self.metrics()
+    def visualize(self, idx=9):
+        image_raw, image_noise, image_recon = self.pipeline.visualization(idx=idx)
+        return self.array2image(image_raw), self.array2image(image_noise), self.array2image(image_recon)
+    def metrics(self):
+        return self.pipeline.metrics
+    def array2image(self, array):
+        image_size = self.pipeline.img_size
+        return PIL.Image.fromarray(self.scale_pixel(array)).resize((image_size))
+    def scale_pixel(self, image):
+        return ((image - image.min()) / (image.max() - image.min()) * 255).astype(np.uint8)
+    def clear_params(self):
+        self.pipeline = Pipeline()
+        return 'L1NormRegularizedNMF', 'YaleB', 3, 'Salt & Pepper', 'Low', 99, 'MinMax'
+app = App()
+image_size = app.pipeline.img_size
+with gr.Blocks() as demo:
+    gr.Markdown("# NMF Image Reconstruction")
+    with gr.Row():
+        with gr.Group():
+            with gr.Row():
+                nmf = gr.Dropdown(
+                    label="NMF Algorithm",
+                    choices=['L1NormRegularizedNMF', 'L2NormNMF', 'KLDivergenceNMF',
+                            'ISDivergenceNMF', 'L21NormNMF', 'HSCostNMF',
+                            'CappedNormNMF', 'CauchyNMF'],
+                    value='L1NormRegularizedNMF',
+                    info="Choose the NMF algorithm.")
+                dataset = gr.Dropdown(
+                    label="Dataset",
+                    choices=['ORL', 'YaleB'],
+                    value='YaleB',
+                    info="Choose the dataset.")
+                reduce = gr.Number(
+                    value=3,
+                    label="Reduce",
+                    info="Choose the reduce.")
+            with gr.Row():
+                noise_type = gr.Dropdown(
+                    label="Noise Type",
+                    choices=['Uniform', 'Gaussian', 'Laplacian', 'Salt & Pepper', 'Block'],
+                    value='Salt & Pepper',
+                    info="Choose the noise type.")
+                noise_level = gr.Radio(
+                    choices=['Low', 'High'],
+                    value='Low',
+                    label="Noise Level",
+                    info="Choose the noise level."
+                )
+            with gr.Row():
+                random_state = gr.Number(
+                    value=99,
+                    label="Random State",
+                    info="Choose the random state.",)
+                scaler = gr.Dropdown(
+                    label="Scaler",
+                    choices=['MinMax', 'Standard'],
+                    value='MinMax',
+                    info="Choose the scaler.")
+            with gr.Row():
+                max_iter= gr.Number(
+                    value=500,
+                    label="Max Iteration",
+                    info="Choose the max iteration.")
+                idx = gr.Number(
+                    value=9,
+                    label="Image Index",
+                    info="Choose the image index.")
+            with gr.Row():
+                execute_bt = gr.Button(value="Execute Algorithm",)
+                clear_params_bt = gr.Button(
+                    value="Clear Parameters")
+        with gr.Group():
+            with gr.Row():
+                output_image_raw = gr.Image(
+                    height=image_size[1],
+                    width=image_size[0],
+                    image_mode="L",
+                    label="Original Image",
+                    show_download_button=True,
+                    show_share_button=True,)
+                output_image_noise = gr.Image(
+                    height=image_size[1],
+                    width=image_size[0],
+                    label="Noisy Image",
+                    image_mode="L",
+                    show_download_button=True,
+                    show_share_button=True,)
+                output_image_recon = gr.Image(
+                    height=image_size[1],
+                    width=image_size[0],
+                    label="Reconstructed Image",
+                    image_mode="L",
+                    show_download_button=True,
+                    show_share_button=True,)
+            with gr.Row():
+                rmse = gr.Number(
+                    label="RMSE",
+                    info="Average root mean square error",
+                    precision=4,)
+                acc = gr.Number(
+                    label="Acc",
+                    info="Accuracy",
+                    precision=4,)
+                nmi = gr.Number(
+                    label="NMI",
+                    info="Normalized mutual information",
+                    precision=4,)
+                clear_output_bt = gr.ClearButton(
+                    value="Clear Output",
+                components=[output_image_raw, output_image_noise, output_image_recon, rmse, acc, nmi],)
+    nmf.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
+    dataset.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
+    dataset.input(app.align_reduce, inputs=[dataset], outputs=[reduce])
+    reduce.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
+    noise_type.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
+    noise_level.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
+    random_state.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
+    scaler.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
+    idx.input(app.visualize, inputs=[idx], outputs=[output_image_raw, output_image_noise, output_image_recon])
+    execute_bt.click(app.execute, inputs=[max_iter, idx], outputs=[output_image_raw, output_image_noise, output_image_recon, rmse, acc, nmi])
+    clear_params_bt.click(app.clear_params, outputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
+if __name__ == '__main__':
+    demo.queue()
+    demo.launch(inbrowser=True,
+                share=True,
+                server_name="0.0.0.0",
+                server_port=8080)

data/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

data/CroppedYaleB/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

data/CroppedYaleB/yaleB01/DEADJOE ADDED Viewed

	@@ -0,0 +1,3 @@


1	+
2	+ *** Modified files in JOE when it aborted on Fri May 18 01:57:34 2001
3	+ *** JOE was aborted by signal 1

data/CroppedYaleB/yaleB01/yaleB01_P00.info ADDED Viewed

	@@ -0,0 +1,23 @@

+yaleB01_P00_Ambient.pgm
+yaleB01_P00A+000E+00.pgm
+yaleB01_P00A+010E-20.pgm
+yaleB01_P00A+020E-10.pgm
+yaleB01_P00A+025E+00.pgm
+yaleB01_P00A+020E+10.pgm
+yaleB01_P00A+015E+20.pgm
+yaleB01_P00A+000E+20.pgm
+yaleB01_P00A-015E+20.pgm
+yaleB01_P00A-020E+10.pgm
+yaleB01_P00A-025E+00.pgm
+yaleB01_P00A-020E-10.pgm
+yaleB01_P00A-010E-20.pgm
+yaleB01_P00A+000E-20.pgm
+yaleB01_P00A-035E-20.pgm
+yaleB01_P00A-035E+15.pgm
+yaleB01_P00A+000E-35.pgm
+yaleB01_P00A-005E-10.pgm
+yaleB01_P00A-010E+00.pgm
+yaleB01_P00A-005E+10.pgm
+yaleB01_P00A+005E+10.pgm
+yaleB01_P00A+010E+00.pgm
+yaleB01_P00A+005E-10.pgm