XavierSpycy commited on
Commit
bd67cfe
·
1 Parent(s): 7991934

First commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. algorithm/__init__.py +0 -0
  3. algorithm/__pycache__/__init__.cpython-312.pyc +0 -0
  4. algorithm/__pycache__/datasets.cpython-312.pyc +0 -0
  5. algorithm/__pycache__/nmf.cpython-312.pyc +0 -0
  6. algorithm/__pycache__/preprocess.cpython-312.pyc +0 -0
  7. algorithm/__pycache__/sample.cpython-312.pyc +0 -0
  8. algorithm/datasets.py +135 -0
  9. algorithm/nmf.py +752 -0
  10. algorithm/pipeline.py +371 -0
  11. algorithm/preprocess.py +234 -0
  12. algorithm/sample.py +37 -0
  13. algorithm/user_evaluate.py +32 -0
  14. algorithm/visualize.py +161 -0
  15. app.py +196 -0
  16. data/.DS_Store +0 -0
  17. data/CroppedYaleB/.DS_Store +0 -0
  18. data/CroppedYaleB/yaleB01/DEADJOE +3 -0
  19. data/CroppedYaleB/yaleB01/yaleB01_P00.info +23 -0
  20. data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+00.pgm +3 -0
  21. data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+20.pgm +3 -0
  22. data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+45.pgm +3 -0
  23. data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+90.pgm +3 -0
  24. data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-20.pgm +3 -0
  25. data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-35.pgm +3 -0
  26. data/CroppedYaleB/yaleB01/yaleB01_P00A+005E+10.pgm +3 -0
  27. data/CroppedYaleB/yaleB01/yaleB01_P00A+005E-10.pgm +3 -0
  28. data/CroppedYaleB/yaleB01/yaleB01_P00A+010E+00.pgm +3 -0
  29. data/CroppedYaleB/yaleB01/yaleB01_P00A+010E-20.pgm +3 -0
  30. data/CroppedYaleB/yaleB01/yaleB01_P00A+015E+20.pgm +3 -0
  31. data/CroppedYaleB/yaleB01/yaleB01_P00A+020E+10.pgm +3 -0
  32. data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-10.pgm +3 -0
  33. data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-40.pgm +3 -0
  34. data/CroppedYaleB/yaleB01/yaleB01_P00A+025E+00.pgm +3 -0
  35. data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+15.pgm +3 -0
  36. data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+40.pgm +3 -0
  37. data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+65.pgm +3 -0
  38. data/CroppedYaleB/yaleB01/yaleB01_P00A+035E-20.pgm +3 -0
  39. data/CroppedYaleB/yaleB01/yaleB01_P00A+050E+00.pgm +3 -0
  40. data/CroppedYaleB/yaleB01/yaleB01_P00A+050E-40.pgm +3 -0
  41. data/CroppedYaleB/yaleB01/yaleB01_P00A+060E+20.pgm +3 -0
  42. data/CroppedYaleB/yaleB01/yaleB01_P00A+060E-20.pgm +3 -0
  43. data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+00.pgm +3 -0
  44. data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+45.pgm +3 -0
  45. data/CroppedYaleB/yaleB01/yaleB01_P00A+070E-35.pgm +3 -0
  46. data/CroppedYaleB/yaleB01/yaleB01_P00A+085E+20.pgm +3 -0
  47. data/CroppedYaleB/yaleB01/yaleB01_P00A+085E-20.pgm +3 -0
  48. data/CroppedYaleB/yaleB01/yaleB01_P00A+095E+00.pgm +3 -0
  49. data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+15.pgm +3 -0
  50. data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+40.pgm +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pgm filter=lfs diff=lfs merge=lfs -text
algorithm/__init__.py ADDED
File without changes
algorithm/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (148 Bytes). View file
 
algorithm/__pycache__/datasets.cpython-312.pyc ADDED
Binary file (5.53 kB). View file
 
algorithm/__pycache__/nmf.cpython-312.pyc ADDED
Binary file (53.1 kB). View file
 
algorithm/__pycache__/preprocess.cpython-312.pyc ADDED
Binary file (11.5 kB). View file
 
algorithm/__pycache__/sample.cpython-312.pyc ADDED
Binary file (1.77 kB). View file
 
algorithm/datasets.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Tuple
3
+
4
+ import numpy as np
5
+ from PIL import Image
6
+
7
+ def validate_data_directory(root: str) -> None:
8
+ """
9
+ Validate the data directory.
10
+
11
+ Parameters:
12
+ - root (str): Path to the dataset.
13
+ """
14
+
15
+ # Check 1: root exists.
16
+ if not os.path.exists(root):
17
+ raise FileNotFoundError(f'{root} does not exist!')
18
+
19
+ # Check 2: data directory is not empty.
20
+ subdirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
21
+ if not subdirs:
22
+ raise FileNotFoundError(f'{root} is empty!')
23
+
24
+ # Check 3: each subdirectory contains at least one image.
25
+ for subdir in subdirs:
26
+ pgm_files = [f for f in os.listdir(os.path.join(root, subdir)) if f.endswith('.pgm')]
27
+ if not pgm_files:
28
+ raise FileNotFoundError(f'{os.path.join(root, subdir)} does not contain any image!')
29
+
30
+
31
+ def load_data(root: str='data/CroppedYaleB', reduce: int=1, global_centering: bool=True, local_centering: bool=True) -> Tuple[np.ndarray, np.ndarray]:
32
+ """
33
+ Load ORL (or Extended YaleB) dataset into a numpy array.
34
+
35
+ Parameters:
36
+ - root (str): Path to the dataset.
37
+ - reduce (int): Scale factor for downscaling images.
38
+ - global_centering (bool): If True, apply global centering.
39
+ - local_centering (bool): If True, apply local centering.
40
+
41
+ Returns:
42
+ - images (numpy.ndarray): Image data.
43
+ - labels (numpy.ndarray): Image labels.
44
+ """
45
+
46
+ # Validate the data directory.
47
+ validate_data_directory(root)
48
+
49
+ images, labels = [], []
50
+
51
+ for i, person in enumerate(sorted(os.listdir(root))):
52
+
53
+ if not os.path.isdir(os.path.join(root, person)):
54
+ continue
55
+
56
+ for fname in os.listdir(os.path.join(root, person)):
57
+
58
+ # Remove background images in Extended YaleB dataset.
59
+ if fname.endswith('Ambient.pgm'):
60
+ continue
61
+
62
+ if not fname.endswith('.pgm'):
63
+ continue
64
+
65
+ # Load image.
66
+ img = Image.open(os.path.join(root, person, fname))
67
+ img = img.convert('L') # grey image.
68
+
69
+ # Reduce computation complexity.
70
+ img = img.resize([s//reduce for s in img.size])
71
+
72
+ # Convert image to numpy array.
73
+ img = np.asarray(img).reshape((-1,1))
74
+
75
+ # Collect data and label.
76
+ images.append(img)
77
+ labels.append(i)
78
+
79
+ # Concatenate all images and labels.
80
+ images = np.concatenate(images, axis=1)
81
+ labels = np.array(labels)
82
+
83
+ # Convert to float64 for numerical stability
84
+ images = images.astype(np.float64)
85
+
86
+ # Global centering.
87
+ if global_centering:
88
+ images -= images.mean(axis=0)
89
+
90
+ # Local centering.
91
+ if local_centering:
92
+ images -= images.mean(axis=1).reshape(-1, 1)
93
+
94
+ return images, labels
95
+
96
+
97
+ def get_image_size(root: str='code/dataCroppedYaleB') -> tuple:
98
+ """
99
+ Get the size of images in the dataset.
100
+
101
+ Parameters:
102
+ - root (str): Path to the dataset.
103
+
104
+ Returns:
105
+ - img_size (tuple): Size of each image as (width, height).
106
+ """
107
+
108
+ # Validate the data directory.
109
+ validate_data_directory(root)
110
+
111
+ img_size = None # Initialize variable to hold image size
112
+
113
+ for person in sorted(os.listdir(root)):
114
+
115
+ if not os.path.isdir(os.path.join(root, person)):
116
+ continue
117
+
118
+ for fname in os.listdir(os.path.join(root, person)):
119
+
120
+ # Remove background images in Extended YaleB dataset.
121
+ if fname.endswith('Ambient.pgm'):
122
+ continue
123
+
124
+ if not fname.endswith('.pgm'):
125
+ continue
126
+
127
+ # Load image.
128
+ img = Image.open(os.path.join(root, person, fname))
129
+ img = img.convert('L') # Grey image.
130
+
131
+ # Reduce computation complexity.
132
+ img = img.resize([s for s in img.size])
133
+
134
+ # Store the image size and return immediately
135
+ return img.size # (width, height)
algorithm/nmf.py ADDED
@@ -0,0 +1,752 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from abc import ABC, abstractmethod
4
+ from collections import Counter
5
+ from typing import Union, Dict, Tuple, Generator
6
+
7
+ import numpy as np
8
+ from tqdm import tqdm
9
+ from scipy.linalg import pinv
10
+ import matplotlib.pyplot as plt
11
+ from sklearn.cluster import KMeans, BisectingKMeans
12
+ from sklearn.metrics import mean_squared_error, accuracy_score, normalized_mutual_info_score
13
+
14
+ class BasicNMF(ABC):
15
+ name = 'Basic'
16
+ """
17
+ A basic framework for Non-negative Matrix Factorization (NMF) algorithms.
18
+ """
19
+ def __init__(self) -> None:
20
+ """
21
+ Initialize the basic NMF algorithm.
22
+ """
23
+ self.loss_list = []
24
+
25
+ def __PCA(self, X: np.ndarray, n_components: int) -> np.ndarray:
26
+ """
27
+ Principal Component Analysis (PCA) for dimensionality reduction.
28
+
29
+ Parameters:
30
+ X (numpy.ndarray): Input dataset of shape (n_samples, n_features).
31
+ n_components (int): Number of principal components to retain.
32
+
33
+ Returns:
34
+ transformed_data (numpy.ndarray): Dataset transformed into principal component space.
35
+ """
36
+ if n_components > X.shape[1]:
37
+ raise ValueError("n_components must be less than or equal to the number of features")
38
+
39
+ # Center the data
40
+ X_centered = X - np.mean(X, axis=0)
41
+ # Calculate the covariance matrix and its eigenvalues and eigenvectors
42
+ cov_mat = np.cov(X_centered, rowvar=False)
43
+ eigenvalues, eigenvectors = np.linalg.eigh(cov_mat)
44
+ # Sort the eigenvalues and eigenvectors in descending order
45
+ sorted_indices = eigenvalues.argsort()[::-1]
46
+ eigenvectors = eigenvectors[:, sorted_indices]
47
+ # Projection matrix using the first n_components eigenvectors
48
+ projection_matrix = eigenvectors[:, :n_components]
49
+ # Project the data onto the new feature space
50
+ transformed_data = np.dot(X_centered, projection_matrix)
51
+ return transformed_data
52
+
53
+ def __FastICA(self, X: np.ndarray, max_iter: int=200, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
54
+ """
55
+ Implementation of FastICA algorithm to separate the independent sources
56
+ from mixed signals in the input data.
57
+
58
+ Parameters:
59
+ X (numpy.ndarray): Input dataset of shape (n_samples, n_features).
60
+ max_iter (int, optional): The maximum number of iterations for the convergence of the estimation. Default is 200.
61
+
62
+ Return:
63
+ S (numpy.ndarray): Matrix of shape (n_samples, n_features) representing the estimated independent sources.
64
+ """
65
+ # Set the random state
66
+ rng = np.random.RandomState(random_state)
67
+ # Center the data by removing the mean
68
+ X = X - np.mean(X, axis=1, keepdims=True)
69
+ n = X.shape[0]
70
+ # Compute the independent components iteratively
71
+ W = np.zeros((n, n))
72
+ for i in range(n):
73
+ w = rng.rand(n)
74
+ for j in range(max_iter): # max iterations for convergence
75
+ w_new = (X * np.dot(w, X)).mean(axis=1) - 2 * w
76
+ w_new /= np.sqrt((w_new ** 2).sum())
77
+ # Convergence check based on the weight vector's direction
78
+ if np.abs(np.abs((w_new * w).sum()) - 1) < 1e-04:
79
+ break
80
+ w = w_new
81
+ W[i, :] = w
82
+ X -= np.outer(w, np.dot(w, X))
83
+ # Compute the estimated independent sources
84
+ S = np.dot(W, X)
85
+ return S
86
+
87
+ def __NICA(self, X: np.ndarray, r: int, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
88
+ """
89
+ Implementation of a non-negative Independent Component Analysis (NICA).
90
+ The process involves obtaining a non-negative basic matrix and a
91
+ non-negative coefficient matrix from the input data.
92
+
93
+ Parameters:
94
+ - X (numpy.ndarray): The input data matrix of shape (n_features, n_samples)
95
+ where n_samples is the number of samples, and n_features
96
+ is the number of features.
97
+ - r (int): The number of components to be retained after applying PCA.
98
+
99
+ Returns:
100
+ - W_0 (numpy.ndarray): The non-negative dictionary matrix.
101
+ - H_0 (numpy.ndarray): The non-negative representation matrix.
102
+ """
103
+ # Set A as a pseudoinverse of X
104
+ A = pinv(X.T)
105
+ # Apply PCA on the matrix A to generate the basic matrix W
106
+ W = self.__PCA(A, n_components=r)
107
+ # Whiten the basic matrix W obtained above by using the eigenvalue decomposition of the covariance matrix of W.
108
+ eigenvalues, eigenvectors = np.linalg.eigh(np.cov(W, rowvar=False))
109
+ # Preallocate memory for whitened matrix
110
+ W_whitened = np.empty_like(W)
111
+ np.dot(W, eigenvectors, out=W_whitened)
112
+ W_whitened /= np.sqrt(eigenvalues + 1e-5)
113
+ # Implement ICA algorithm on the whitened matrix W and obtain the independent basic matrix W_0
114
+ # Assuming FastICA() returns the transformed matrix
115
+ W_0 = self.__FastICA(W_whitened, random_state=random_state)
116
+ # Preallocate memory for H_0 and calculate it
117
+ H_0 = np.empty((W_0.shape[1], X.shape[1]))
118
+ np.dot(W_0.T, X, out=H_0)
119
+ # Take the absolute value in-place
120
+ np.abs(W_0, out=W_0)
121
+ np.abs(H_0, out=H_0)
122
+ return W_0, H_0
123
+
124
+ def Kmeans(self, X: np.ndarray, n_components: int, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
125
+ """
126
+ Initialize D and R matrices using K-means algorithm.
127
+
128
+ Parameters:
129
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
130
+ - n_components (int): The number of components for matrix factorization.
131
+ - random_state (int, np.random.RandomState, None): Random state for reproducibility.
132
+ """
133
+ # Intialize
134
+ kmeans = KMeans(n_clusters=n_components, n_init='auto', random_state=random_state)
135
+ kmeans.fit(X.T)
136
+ D = kmeans.cluster_centers_.T
137
+ labels = kmeans.labels_
138
+ G = np.zeros(((len(labels)), n_components))
139
+ for i, label in enumerate(labels):
140
+ G[i, label] = 1
141
+ G = G / np.sqrt(np.sum(G, axis=0, keepdims=True))
142
+ G += 0.2
143
+ R = G.T
144
+ return D, R
145
+
146
+ def matrix_init(self, X: np.ndarray, n_components: int,
147
+ random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
148
+ """
149
+ Initialize D and R matrices using NICA algorithm.
150
+
151
+ Parameters:
152
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
153
+ - n_components (int): The number of components for matrix factorization.
154
+ - random_state (int, np.random.RandomState, None): Random state for reproducibility.
155
+
156
+ Returns:
157
+ - D (numpy.ndarray): The non-negative dictionary matrix.
158
+ - R (numpy.ndarray): The non-negative representation matrix.
159
+ """
160
+ # Intialize
161
+ D, R = self.__NICA(X, n_components, random_state=random_state)
162
+ return D, R
163
+
164
+ def fit(self, X: np.ndarray, n_components: int, max_iter: int=500,
165
+ random_state: Union[int, np.random.RandomState, None]=None,
166
+ verbose: bool=True, imshow: bool=False, warm_start: bool=False, **kwargs) -> None:
167
+ """
168
+ Non-negative Matrix Factorization (NMF) algorithm using L2-norm for convergence criterion.
169
+
170
+ Parameters:
171
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
172
+ - n_components (int): The number of components for matrix factorization.
173
+ - max_iter (int, optional): Maximum number of iterations. Default is 5000.
174
+ - verbose (bool, optional): Whether to show the progress bar.
175
+ - random_state (int, np.random.RandomState, None, optional): Random state for reproducibility. Default is None.
176
+ - imshow (bool, optional): Whether to plot convergence trend. Default is False.
177
+ - warm_start (bool, optional): Whether to continue from the previous state. Default is False.
178
+ - kwargs: Additional keyword arguments for the update rule.
179
+ """
180
+ # Record start time
181
+ start_time = time.time()
182
+ # Initialize D and R matrices using NICA algorithm by default
183
+ if not warm_start or (warm_start and not hasattr(self, 'D') and not hasattr(self, 'R')):
184
+ self.D, self.R = self.matrix_init(X, n_components, random_state)
185
+ else:
186
+ if verbose:
187
+ print('Warm start enabled. Continuing from previous state.')
188
+
189
+ # Compute initialization time
190
+ init_time = time.time() - start_time
191
+ # Copy D and R matrices for convergence check
192
+ self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
193
+ if verbose:
194
+ print(f'Initialization done. Time elapsed: {init_time:.2f} seconds.')
195
+ # Iteratively update D and R matrices until convergence
196
+ for _ in self.conditional_tqdm(range(max_iter), verbose=verbose):
197
+ # Update D and R matrices
198
+ flag = self.update(X, **kwargs)
199
+ # Check convergence
200
+ if flag:
201
+ if verbose:
202
+ print('Converged at iteration', _)
203
+ break
204
+ if imshow:
205
+ self.plot()
206
+
207
+ @abstractmethod
208
+ def update(self, X: np.ndarray, **kwargs: Dict[str, float]) -> bool:
209
+ """
210
+ Update rule for D and R matrices using a specific NMF algorithm, which must be implemented in the derived class.
211
+
212
+ Parameters:
213
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
214
+ - kwargs: Additional keyword arguments for the update rule.
215
+
216
+ Returns:
217
+ - flag (bool): Whether the algorithm has converged.
218
+ """
219
+ # Calculate L2-norm based errors for convergence
220
+ e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
221
+ e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
222
+ return (e_D < 1e-6 and e_R < 1e-6)
223
+
224
+ def plot(self) -> None:
225
+ """
226
+ Plot the convergence trend of the cost function.
227
+ """
228
+ plt.plot(self.loss_list)
229
+ plt.xlabel('Iteration')
230
+ plt.ylabel('Cost function')
231
+ plt.grid()
232
+ plt.show()
233
+
234
+ def conditional_tqdm(self, iterable, verbose: bool=True) -> Generator[int, None, None]:
235
+ """
236
+ Determine whether to use tqdm or not based on the verbose flag.
237
+
238
+ Parameters:
239
+ - iterable (range): Range of values to iterate over.
240
+ - verbose (bool, optional): Whether to print progress bar. Default is True.
241
+
242
+ Returns:
243
+ - item (int): Current iteration.
244
+ """
245
+ if verbose:
246
+ for item in tqdm(iterable):
247
+ yield item
248
+ else:
249
+ for item in iterable:
250
+ yield item
251
+
252
+ def normalize(self, epsilon: float=1e-7) -> None:
253
+ """
254
+ Normalize columns of D and rows of R.
255
+
256
+ Parameter:
257
+ - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
258
+ """
259
+ # Normalize columns of D and rows of R
260
+ norms = np.sqrt(np.sum(self.D**2, axis=0))
261
+ self.D /= norms[np.newaxis, :] + epsilon
262
+ self.R *= norms[:, np.newaxis]
263
+
264
+ def evaluate(self, X_clean: np.ndarray, Y_true: np.ndarray, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[float, float, float]:
265
+ """
266
+ Evaluate the specific NMF algorithm on the specific dataset.
267
+
268
+ Parameters:
269
+ - X_clean (numpy.ndarray): The original clean data matrix of shape (n_features, n_samples).
270
+ - Y_true (numpy.ndarray): The true labels corresponding to each sample in X of shape (n_samples,).
271
+ - random_state (int, np.random.RandomState, None, optional): Random state for reproducibility. Default is None.
272
+
273
+ Returns:
274
+ - rmse (float): The root mean squared error of the reconstructed matrix and the original matrix.
275
+ - acc (float): The accuracy score of the predicted labels based on the clustering results on the reconstructed matrix.
276
+ - nmi (float): The normalized mutual information score of the predicted labels based on the clustering results on the reconstructed matrix.
277
+ """
278
+ Y_label = self.__labeling(self.R.T, Y_true, random_state=random_state)
279
+ rmse = np.sqrt(mean_squared_error(X_clean, np.dot(self.D, self.R)))
280
+ acc = accuracy_score(Y_true, Y_label)
281
+ nmi = normalized_mutual_info_score(Y_true, Y_label)
282
+ return rmse, acc, nmi
283
+
284
+ def __labeling(self, X: np.ndarray, Y: np.ndarray, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
285
+ """
286
+ Label data based on clusters obtained from KMeans clustering,
287
+ by assigning the most frequent label in each cluster.
288
+
289
+ Parameters:
290
+ - X (numpy.ndarray): Input feature matrix of shape (n_samples, n_features).
291
+ - Y (numpy.ndarray): True labels corresponding to each sample in X of shape (n_samples,).
292
+
293
+ Returns:
294
+ - Y_pred (numpy.ndarray): Predicted labels for each sample based on the clustering results.
295
+
296
+ Note:
297
+ This function works best when the input data is somewhat separated into distinct
298
+ clusters that align with the true labels.
299
+ """
300
+ cluster = BisectingKMeans(len(set(Y)), random_state=random_state).fit(X)
301
+ Y_pred = np.zeros(Y.shape)
302
+ for i in set(cluster.labels_):
303
+ ind = cluster.labels_ == i
304
+ Y_pred[ind] = Counter(Y[ind]).most_common(1)[0][0] # assign label.
305
+ return Y_pred
306
+
307
+ def vectorized_armijo_rule(self, f, grad_f, X, alpha, c=1e-4, tau=0.5):
308
+ """
309
+ Vectorized Armijo rule to find the step size for each element in the matrix.
310
+
311
+ Parameters:
312
+ - f: The objective function, which should accept a matrix and return a scalar.
313
+ - grad_f: The gradient of the objective function, which returns a matrix.
314
+ - X: Current point, a matrix.
315
+ - alpha: Initial step size, a scalar or a matrix.
316
+ - c: A constant in (0, 1), typically a small value (default is 1e-4).
317
+ - tau: Reduction factor for step size, typically in (0, 1) (default is 0.5).
318
+
319
+ Returns:
320
+ - alpha: Step sizes that satisfy the Armijo condition for each element.
321
+ """
322
+ # Compute the initial objective function value
323
+ f_x = f(X)
324
+ # Compute the initial gradient and its norm squared
325
+ grad_f_x = grad_f(X)
326
+ norm_grad_f_x_squared = np.square(np.linalg.norm(grad_f_x, axis=(0,1), keepdims=True))
327
+
328
+ # Compute the sufficient decrease condition for the entire matrix
329
+ sufficient_decrease = f_x - c * alpha * norm_grad_f_x_squared
330
+
331
+ counter = 0
332
+ # Check the condition for each element
333
+ while np.any(f(X - alpha * grad_f_x) > sufficient_decrease) or counter >= 10:
334
+ # Reduce alpha for elements not satisfying the condition
335
+ alpha *= tau
336
+ counter += 1
337
+ return alpha
338
+
339
+ @classmethod
340
+ def from_pretrained(cls, file_path: str, **kwargs: Dict[str, float]) -> 'BasicNMF':
341
+ """
342
+ Load the model parameters from a file.
343
+
344
+ Parameters:
345
+ - file_path (str): The path to the file where the model parameters are saved.
346
+
347
+ Returns:
348
+ - instance (BasicNMF): An instance of the BasicNMF class with the loaded parameters.
349
+ """
350
+ import pickle
351
+ with open(os.path.join(file_path), 'rb') as file:
352
+ params = pickle.load(file)
353
+ instance = cls(**kwargs)
354
+ instance.__dict__.update(params)
355
+ return instance
356
+
357
+ def save(self, file_path: str) -> None:
358
+ """
359
+ Save the model parameters to a file.
360
+
361
+ Parameters:
362
+ - file_path (str): The path to the file where the model parameters will be saved.
363
+ """
364
+ import pickle
365
+ with open(file_path, 'wb') as file:
366
+ pickle.dump(self.__dict__, file)
367
+
368
+ def __call__(self, **kwargs: Dict[str, float]):
369
+ """
370
+ Overwrite the __call__ method to fit the model with the given parameters.
371
+ """
372
+ self.fit(**kwargs)
373
+
374
+ class L2NormNMF(BasicNMF):
375
+ name = 'L2Norm'
376
+ """
377
+ L2-norm NMF algorithm.
378
+ """
379
+ def __init__(self) -> None:
380
+ super().__init__()
381
+
382
+ def update(self, X: np.ndarray, threshold: float=1e-6, epsilon: float=1e-7) -> bool:
383
+ """
384
+ Update rule for D and R matrices using L2-norm NMF algorithm.
385
+
386
+ Parameters:
387
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
388
+ - threshold (float, optional): Convergence threshold based on L2-norm. Default is 1e-6.
389
+ - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
390
+
391
+ Returns:
392
+ - flag (bool): Whether the algorithm has converged.
393
+ """
394
+ # Multiplicative update rule for D and R matrices
395
+ self.D *= np.dot(X, self.R.T) / (np.dot(np.dot(self.D, self.R), self.R.T) + epsilon)
396
+ self.R *= np.dot(self.D.T, X) / (np.dot(np.dot(self.D.T, self.D), self.R) + epsilon)
397
+ # Calculate the loss function
398
+ loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro') ** 2
399
+ self.loss_list.append(loss)
400
+ # Calculate L2-norm based errors for convergence
401
+ e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
402
+ e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
403
+ # Update previous matrices for next iteration
404
+ self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
405
+ return (e_D < threshold and e_R < threshold)
406
+
407
+ class KLDivergenceNMF(BasicNMF):
408
+ name = 'KLDivergence'
409
+ """
410
+ KL-divergence NMF algorithm.
411
+ """
412
+ def __init__(self) -> None:
413
+ """
414
+ Initialize the KL-divergence NMF algorithm.
415
+ """
416
+ super().__init__()
417
+ self.prev_kl = float('inf')
418
+
419
+ def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
420
+ """
421
+ Update rule for D and R matrices using KL-divergence NMF algorithm.
422
+
423
+ Parameters:
424
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
425
+ - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
426
+ - threshold (float, optional): Convergence threshold based on KL-divergence. Default is 1e-4.
427
+
428
+ Returns:
429
+ - flag (bool): Whether the algorithm has converged.
430
+ """
431
+ # Multiplicative update rule for D and R matrices
432
+ self.D *= np.dot(X / (np.dot(self.D, self.R) + epsilon), self.R.T) / (np.dot(np.ones(X.shape), self.R.T) + epsilon)
433
+ self.R *= np.dot(self.D.T, X / (np.dot(self.D, self.R) + epsilon)) / (np.dot(self.D.T, np.ones(X.shape)) + epsilon)
434
+
435
+ # Calculate KL-divergence
436
+ XR = np.dot(self.D, self.R) + epsilon
437
+ kl_div = np.sum(X * np.log(np.maximum(epsilon, X / (XR + epsilon))) - X + XR)
438
+ self.loss_list.append(kl_div)
439
+ flag = abs(kl_div - self.prev_kl) < threshold
440
+ self.prev_kl = kl_div # Update previous KL divergence
441
+ return flag
442
+
443
+ class ISDivergenceNMF(BasicNMF):
444
+ name = 'ISDivergence'
445
+ """
446
+ IS-divergence NMF algorithm.
447
+ """
448
+ def __init__(self) -> None:
449
+ """
450
+ Initialize the IS-divergence NMF algorithm.
451
+ """
452
+ super().__init__()
453
+ self.prev_is_div = float('inf')
454
+
455
+ def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-6) -> bool:
456
+ """
457
+ Update rule for D and R matrices using IS-divergence NMF algorithm.
458
+
459
+ Parameters:
460
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
461
+ - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
462
+ - threshold (float, optional): Convergence threshold based on IS-divergence. Default is 1e-6.
463
+
464
+ Returns:
465
+ - flag (bool): Whether the algorithm has converged.
466
+ """
467
+ # Update R
468
+ DR = np.dot(self.D, self.R)
469
+ DR = np.where(DR > 0, DR, epsilon)
470
+ self.R *= (np.dot(self.D.T, (DR ** (-2) * X))) / (np.dot(self.D.T, DR ** (-1)) + epsilon)
471
+ # Update D
472
+ DR = np.dot(self.D, self.R)
473
+ DR = np.where(DR > 0, DR, epsilon)
474
+ self.D *= (np.dot((DR ** (-2) * X), self.R.T)) / (np.dot(DR ** (-1), self.R.T) + epsilon)
475
+ # Normalize D and R
476
+ self.normalize(epsilon)
477
+ # Calculate IS-divergence
478
+ DR = np.dot(self.D, self.R) + epsilon
479
+ is_div = np.sum(-np.log(np.maximum(epsilon, X / DR)) + X / DR - 1)
480
+ # Adding L2 regularization terms to the IS-divergence
481
+ # is_div += lambd * np.linalg.norm(self.D, 'fro') ** 2 + lambd * np.linalg.norm(self.R, 'fro')**2
482
+ self.loss_list.append(is_div)
483
+ flag = np.abs(is_div - self.prev_is_div) < threshold
484
+ self.prev_is_div = is_div
485
+ return flag
486
+
487
+ class L21NormNMF(BasicNMF):
488
+ name = 'L21Norm'
489
+ """
490
+ L21 Norm NMF algorithm.
491
+ """
492
+ def __init__(self) -> None:
493
+ """
494
+ Initialize the L21 Norm NMF algorithm.
495
+ """
496
+ super().__init__()
497
+
498
+ def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
499
+ """
500
+ Update rule for D and R matrices using L21 Norm NMF algorithm.
501
+
502
+ Parameters:
503
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
504
+ - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
505
+ - threshold (float, optional): Convergence threshold based on L21 Norm. Default is 1e-4.
506
+
507
+ Returns:
508
+ - flag (bool): Whether the algorithm has converged.
509
+ """
510
+ # Multiplicative update rule for D and R matrices
511
+ residual = X - np.dot(self.D, self.R) # residual.shape = (n_features, n_samples)
512
+ norm_values = np.sqrt(np.sum(residual ** 2, axis=1))
513
+ diagonal = np.diag(1.0 / (norm_values + epsilon)) # diagonal.shape = (n_features, n_features)
514
+ # Update rule for D
515
+ self.D *= (np.dot(np.dot(diagonal, X), self.R.T) / (np.dot(np.dot(np.dot(diagonal, self.D), self.R), self.R.T) + epsilon))
516
+ # Update rule for R
517
+ self.R *= (np.dot(np.dot(self.D.T, diagonal), X) / (np.dot(np.dot(np.dot(self.D.T, diagonal), self.D), self.R) + epsilon))
518
+ # Calculate the loss function
519
+ loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro')
520
+ self.loss_list.append(loss)
521
+ # Calculate L2,1-norm based errors for convergence
522
+ e_D = np.linalg.norm(self.D - self.D_prev, 'fro') / np.linalg.norm(self.D, 'fro')
523
+ e_R = np.linalg.norm(self.R - self.R_prev, 'fro') / np.linalg.norm(self.R, 'fro')
524
+ # Update previous matrices for next iteration
525
+ self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
526
+ return (e_D < threshold and e_R < threshold)
527
+
528
+ class L1NormRegularizedNMF(BasicNMF):
529
+ name = 'L1NormRegularized'
530
+ """
531
+ L1 Norm Regularized NMF algorithm.
532
+ """
533
+ def __init__(self) -> None:
534
+ """
535
+ Initialize the L1 Norm Regularized NMF algorithm.
536
+ """
537
+ super().__init__()
538
+
539
+ # Helper function
540
+ def soft_thresholding(self, x: np.ndarray, lambd: float) -> np.ndarray:
541
+ """
542
+ Soft thresholding operator.
543
+
544
+ Parameters:
545
+ - x (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
546
+ - lambd (float): Threshold value.
547
+
548
+ Returns:
549
+ - y (numpy.ndarray): The updated matrix after applying the soft thresholding operator.
550
+ """
551
+ return np.where(x > lambd, x - lambd, np.where(x < -lambd, x + lambd, 0))
552
+
553
+ def update(self, X: np.ndarray, lambd: float=0.2, epsilon: float=1e-7, threshold: float=1e-8) -> bool:
554
+ """
555
+ Update rule for D and R matrices using L1 Norm Regularized NMF algorithm.
556
+
557
+ Parameters:
558
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
559
+ - lambd (float): Threshold value.
560
+ - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
561
+ - threshold (float, optional): Convergence threshold based on L1 Norm Regularized. Default is 1e-8.
562
+
563
+ Returns:
564
+ - flag (bool): Whether the algorithm has converged.
565
+ """
566
+ # Compute the error matrix
567
+ S = X - np.dot(self.D, self.R)
568
+ # Soft thresholding operator
569
+ S = self.soft_thresholding(S, lambd/2)
570
+ # Multiplicative update rule for D and R matrices
571
+ update_D = np.dot(S - X, self.R.T)
572
+ self.D *= (np.abs(update_D) - update_D) / (2 * np.dot(np.dot(self.D, self.R), self.R.T) + epsilon)
573
+ update_R = np.dot(self.D.T, S - X)
574
+ self.R *= (np.abs(update_R) - update_R) / (2 * np.dot(np.dot(self.D.T, self.D), self.R) + epsilon)
575
+ self.normalize(epsilon)
576
+ # Calculate the loss function
577
+ loss = np.linalg.norm(X - np.dot(self.D, self.R) - S, 'fro') ** 2 + lambd * np.sum(np.abs(S))
578
+ self.loss_list.append(loss)
579
+ # Calculate L2-norm based errors for convergence
580
+ e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
581
+ e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
582
+ # Update previous matrices for next iteration
583
+ self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
584
+ return (e_D < threshold and e_R < threshold)
585
+
586
+ def matrix_init(self, X: np.ndarray, n_components: int,
587
+ random_state: Union[int, np.random.RandomState, None]=None) -> None:
588
+ return self.Kmeans(X, n_components, random_state)
589
+
590
+ class CauchyNMF(BasicNMF):
591
+ name = 'Cauchy'
592
+ """
593
+ Cauchy NMF algorithm.
594
+ """
595
+ def __init__(self) -> None:
596
+ """
597
+ Initialize the Cauchy NMF algorithm.
598
+ """
599
+ super().__init__()
600
+
601
+ # Helper function
602
+ def compute(self, A: np.ndarray, B: np.ndarray, epsilon: float) -> np.ndarray:
603
+ """
604
+ Update rule for Cauchy divergence.
605
+
606
+ Parameters:
607
+ A (numpy.ndarray): The first matrix, which is noted as A.
608
+ B (numpy.ndarray): The second matrix, which is noted as B.
609
+ epsilon (float): Small constant added to denominator to prevent division by zero.
610
+
611
+ Returns:
612
+ C (numpy.ndarray): The updated matrix.
613
+ """
614
+ temp = A ** 2 + 2 * B * A
615
+ temp = np.where(temp > 0, temp, epsilon)
616
+ return B / (A + np.sqrt(temp))
617
+
618
+ def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
619
+ """
620
+ Update rule for D and R matrices using Cauchy NMF algorithm.
621
+
622
+ Parameters:
623
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
624
+ - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
625
+ - threshold (float, optional): Convergence threshold based on Cauchy. Default is 1e-4.
626
+
627
+ Returns:
628
+ - flag (bool): Whether the algorithm has converged.
629
+ """
630
+ if not hasattr(self, 'prev_cauchy_div'):
631
+ DR = np.dot(self.D, self.R)
632
+ log_residual = np.log(DR + epsilon) - np.log(X + epsilon)
633
+ residual = X - DR
634
+ self.prev_cauchy_div = np.sum(log_residual + residual / (DR + epsilon))
635
+ # Update rule for D
636
+ DR = np.dot(self.D, self.R)
637
+ A = 3 / 4 * np.dot((DR / (DR ** 2 + X + epsilon)), self.R.T)
638
+ B = np.dot(1 / (DR + epsilon), self.R.T)
639
+ self.D *= self.compute(A, B, epsilon)
640
+ # Update rule for R
641
+ DR = np.dot(self.D, self.R)
642
+ A = 3 / 4 * np.dot(self.D.T, (DR / (DR ** 2 + X + epsilon)))
643
+ B = np.dot(self.D.T, 1 / (DR + epsilon))
644
+ self.R *= self.compute(A, B, epsilon)
645
+ # Calculate Cauchy divergence
646
+ DR = np.dot(self.D, self.R)
647
+ cauchy_div = np.sum(np.log(DR + epsilon) - np.log(X + epsilon) + (X - DR) / (DR + epsilon))
648
+ self.loss_list.append(cauchy_div)
649
+ flag = abs(cauchy_div - self.prev_cauchy_div) < threshold
650
+ self.prev_cauchy_div = cauchy_div # Update previous Cauchy divergence
651
+ return flag
652
+
653
+ class CappedNormNMF(BasicNMF):
654
+ name = 'CappedNorm'
655
+ """
656
+ Capped Norm NMF algorithm.
657
+ """
658
+ def __init__(self) -> None:
659
+ """
660
+ Initialize Capped Norm NMF algorithm.
661
+ """
662
+ super().__init__()
663
+ self.loss_prev = float('inf')
664
+
665
+ # Helper function
666
+ def matrix_init(self, X: np.ndarray, n_components: int,
667
+ random_state: Union[int, np.random.RandomState, None]=None) -> None:
668
+ return self.Kmeans(X, n_components, random_state)
669
+
670
+ def update(self, X, theta: float=0.2, threshold: float=1e-3, epsilon: float=1e-7) -> bool:
671
+ """
672
+ Update rule for D and R matrices using Capped Norm NMF algorithm.
673
+
674
+ Parameters:
675
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
676
+ - theta (float, optional): Outlier parameter. Default is 0.2.
677
+ - threshold (float, optional): Convergence threshold based on L2,1-norm. Default is 1e-4.
678
+ - epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
679
+ """
680
+ if not hasattr(self, 'I'):
681
+ self.n_samples = X.shape[1]
682
+ self.I = np.identity(self.n_samples)
683
+ # Multiplicative update rule for D and R matrices
684
+ G = self.R.T
685
+ self.D *= np.dot(np.dot(X, self.I), G) / (np.dot(np.dot(np.dot(self.D, G.T), self.I), G) + epsilon)
686
+ G *= np.sqrt((np.dot(np.dot(self.I, X.T), self.D)) / (np.dot(np.dot(np.dot(np.dot(self.I, G), G.T), X.T), self.D) + epsilon))
687
+ self.R = G.T
688
+ # Update rule for I
689
+ diff = X - np.dot(self.D, self.R)
690
+ norms = np.linalg.norm(diff, axis=0)
691
+ norms /= np.max(norms)
692
+ I = np.full_like(norms, epsilon)
693
+ indices = np.where(norms < theta)
694
+ I[indices] = 1 / (2 * norms[indices])
695
+ self.I = np.diagflat(I)
696
+ # Calculate the loss function
697
+ loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro') ** 2
698
+ flag = abs(loss - self.loss_prev) < threshold
699
+ self.loss_list.append(loss)
700
+ self.loss_prev = loss
701
+ return flag
702
+
703
+ class HSCostNMF(BasicNMF):
704
+ name = 'HSCost'
705
+ """
706
+ Hypersurface Cost NMF algorithm.
707
+ """
708
+ def __init__(self) -> None:
709
+ """
710
+ Initialize Hypersurface Cost NMF algorithm.
711
+ """
712
+ super().__init__()
713
+ self.loss_prev = float('inf')
714
+ # Objective function and its gradient
715
+ self.obj_func = lambda X, D, R: np.linalg.norm(X - np.dot(D, R), 'fro')
716
+ self.grad_D = lambda X, D, R: (np.dot((np.dot(D, R) - X), R.T)) / np.sqrt(1 + np.linalg.norm(X - np.dot(D, R), 'fro'))
717
+ self.grad_R = lambda X, D, R: (np.dot(D.T, (np.dot(D, R) - X))) / np.sqrt(1 + np.linalg.norm(X - np.dot(D, R), 'fro'))
718
+
719
+ def update(self, X: np.ndarray, threshold: float=1e-8, alpha: float=0.1, beta: float=0.1, c: float=1e-4, tau: float=0.5) -> bool:
720
+ """
721
+ Update rule for D and R matrices using Hypersurface Cost NMF algorithm.
722
+
723
+ Parameters:
724
+ - X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
725
+ - alpha (float, optional): Learning rate for gradient descent. Default is 0.1.
726
+ - beta (float, optional): Learning rate for gradient descent. Default is 0.1.
727
+ - c (float, optional): A constant in (0, 1), typically a small value. Default is 1e-4.
728
+ - tau (float, optional): A reduction factor for step size, typically in (0, 1). Default is 0.5.
729
+
730
+ Returns:
731
+ - flag (bool): Whether the algorithm has converged.
732
+ """
733
+ if not hasattr(self, 'alpha'):
734
+ self.alpha = np.full_like(self.D, alpha)
735
+ self.beta = np.full_like(self.R, beta)
736
+ # Vectorized Armijo rule to update alpha and beta
737
+ self.alpha = self.vectorized_armijo_rule(lambda D: self.obj_func(X, D, self.R), lambda D: self.grad_D(X, D, self.R), self.D, self.alpha, c, tau)
738
+ self.beta = self.vectorized_armijo_rule(lambda R: self.obj_func(X, self.D, R), lambda R: self.grad_R(X, self.D, R), self.R, self.beta, c, tau)
739
+ self.alpha = np.maximum(self.alpha, threshold)
740
+ self.beta = np.maximum(self.beta, threshold)
741
+ # Update rule for D and R
742
+ self.D -= self.alpha * (np.dot((np.dot(self.D, self.R) - X), self.R.T)) / np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro'))
743
+ self.R -= self.beta * (np.dot(self.D.T, (np.dot(self.D, self.R) - X))) / np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro'))
744
+ self.D[np.where(self.D < 0)] = 0
745
+ self.R[np.where(self.R < 0)] = 0
746
+ # Calculate loss
747
+ loss_current = np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro')) - 1
748
+ self.loss_list.append(loss_current)
749
+ flag = abs(loss_current - self.loss_prev) < threshold
750
+ # Update previous loss for next iteration
751
+ self.loss_prev = loss_current
752
+ return flag
algorithm/pipeline.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import csv
3
+ import logging
4
+ from typing import Union, List, Tuple, Generator
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ from algorithm.datasets import load_data, get_image_size
10
+ from algorithm.preprocess import NoiseAdder, MinMaxScaler, StandardScaler
11
+ from algorithm.sample import random_sample
12
+ from algorithm.nmf import BasicNMF, L2NormNMF, KLDivergenceNMF, ISDivergenceNMF, L21NormNMF, HSCostNMF, L1NormRegularizedNMF, CappedNormNMF, CauchyNMF
13
+ from algorithm.user_evaluate import evaluate
14
+
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
+
17
+ def summary(log_file_name: str) -> pd.DataFrame:
18
+ """
19
+ Parameter:
20
+ log_file_name (str): The name of the log file to read.
21
+
22
+ Return:
23
+ result (pandas.DataFrame): The summary of the log file.
24
+ """
25
+ df = pd.read_csv(log_file_name)
26
+ result = df.groupby(by=['dataset', 'noise_type', 'noise_level'])[['rmse', 'nmi', 'acc']].mean()
27
+ return result
28
+
29
+ class BasicBlock(object):
30
+ """
31
+ Basic block for the pipeline.
32
+ """
33
+ def basic_info(self, nmf: Union[BasicNMF, str], dataset: str, scaler: str) -> Tuple[str, Union[MinMaxScaler, StandardScaler], BasicNMF]:
34
+ """
35
+ Get the basic information for the pipeline.
36
+
37
+ Parameters:
38
+ - nmf (Union[BasicNMF, str]): NMF algorithm to use.
39
+ - dataset (str): Name of the dataset to use.
40
+ - scaler (str): Name of the scaler to use.
41
+
42
+ Returns:
43
+ - folder (str): Folder of the dataset.
44
+ - scaler (MinMaxScaler or StandardScaler): Scaler to use.
45
+ - nmf (BasicNMF): NMF algorithm to use.
46
+ """
47
+ # Create mappings for the NMF algorithms, datasets, and scalers
48
+ # Store NMF algorithms in a dictionary
49
+ nmf_dict = {
50
+ 'L2NormNMF': L2NormNMF,
51
+ 'KLDivergenceNMF': KLDivergenceNMF,
52
+ 'ISDivergenceNMF': ISDivergenceNMF,
53
+ 'L21NormNMF': L21NormNMF,
54
+ 'HSCostNMF': HSCostNMF,
55
+ 'L1NormRegularizedNMF': L1NormRegularizedNMF,
56
+ 'CappedNormNMF': CappedNormNMF,
57
+ 'CauchyNMF': CauchyNMF
58
+ }
59
+ # Store datasets in a dictionary
60
+ dataset_dict = {
61
+ 'ORL': 'data/ORL',
62
+ 'YaleB': 'data/CroppedYaleB'
63
+ }
64
+ # Store scalers in a dictionary
65
+ scaler_dict = {
66
+ 'MinMax': MinMaxScaler(),
67
+ 'Standard': StandardScaler()
68
+ }
69
+ folder = dataset_dict.get(dataset, 'data/ORL')
70
+ # Scale the data
71
+ scaler = scaler_dict.get(scaler, MinMaxScaler())
72
+ # Choose an NMF algorithm
73
+ if isinstance(nmf, BasicNMF):
74
+ nmf = nmf
75
+ else:
76
+ # Choose an NMF algorithm
77
+ nmf = nmf_dict.get(nmf, L1NormRegularizedNMF)()
78
+ return folder, scaler, nmf
79
+
80
+ def load_data(self, folder: str, reduce: int=1, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray, Tuple[int, int]]:
81
+ """
82
+ Load the data.
83
+
84
+ Parameters:
85
+ - folder (str): Folder of the dataset.
86
+ - reduce (int): Factor by which the image size is reduced for visualization.
87
+ - random_state (Union[int, np.random.RandomState, None]): Random state to use for sampling.
88
+
89
+ Returns:
90
+ - X_hat (np.ndarray): The data matrix.
91
+ - Y_hat (np.ndarray): The label matrix.
92
+ - img_size (Tuple[int, int]): Size of the images.
93
+ """
94
+ # Load ORL dataset
95
+ X_hat, Y_hat = load_data(folder, reduce=reduce)
96
+ # Randomly sample 90% of the data
97
+ X_hat, Y_hat = random_sample(X_hat, Y_hat, 0.9, random_state=random_state)
98
+ # Get the size of images
99
+ img_size = get_image_size(folder)
100
+ return X_hat, Y_hat, img_size
101
+
102
+ def add_noise(self, X_hat: np.ndarray, noise_type: str, noise_level: float, random_state: Union[int, np.random.RandomState, None], reduce: int) -> np.ndarray:
103
+ """
104
+ Add noise to the data.
105
+
106
+ Parameters:
107
+ - X_hat (np.ndarray): The data matrix.
108
+ - noise_type (str): Type of noise to add to the data.
109
+ - noise_level (float): Level of noise to add to the data.
110
+ - random_state (Union[int, np.random.RandomState, None]): Random state to use for adding noise.
111
+ - reduce (int): Factor by which the image size is reduced for visualization.
112
+
113
+ Returns:
114
+ - X_noise (np.ndarray): The noisy data matrix.
115
+ """
116
+ # Set random state and noise adder
117
+ noise_adder = NoiseAdder(random_state=random_state)
118
+ # Create a dictionary of noise functions
119
+ noise_dict = {
120
+ 'uniform': (noise_adder.add_uniform_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
121
+ 'gaussian': (noise_adder.add_gaussian_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
122
+ 'laplacian': (noise_adder.add_laplacian_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
123
+ 'salt_and_pepper': (noise_adder.add_salt_and_pepper_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
124
+ 'block': (noise_adder.add_block_noise, {'X_hat': X_hat, 'block_size': noise_level, 'img_width': self.img_size[0]//reduce})
125
+ }
126
+ # Map the noise type to the noise function
127
+ noise_func, args = noise_dict.get(noise_type, (noise_adder.add_uniform_noise, {'X_hat': X_hat, 'noise_level': noise_level}))
128
+ # Add noise to the data
129
+ _, X_noise = noise_func(**args)
130
+ return X_noise
131
+
132
+ def scale(self, X_hat: np.ndarray, X_noise: np.ndarray, scaler: Union[MinMaxScaler, StandardScaler]) -> Tuple[np.ndarray, np.ndarray]:
133
+ """
134
+ Scale the data.
135
+
136
+ Parameters:
137
+ - X_hat (np.ndarray): The data matrix.
138
+ - X_noise (np.ndarray): The noisy data matrix.
139
+ - scaler (MinMaxScaler or StandardScaler): Scaler to use for scaling the data.
140
+
141
+ Returns:
142
+ - X_hat_scaled (np.ndarray): The scaled data matrix.
143
+ - X_noise_scaled (np.ndarray): The scaled noisy data matrix.
144
+ """
145
+ # Scale the data
146
+ X_hat_scaled = scaler.fit_transform(X_hat)
147
+ X_noise_scaled = scaler.transform(X_noise)
148
+ # Ensure that the scaled noisy data is non-negative
149
+ X_noise_scaled += np.abs(np.min(X_noise_scaled)) * np.abs(np.min(X_noise_scaled)) * int(np.min(X_noise_scaled) < 0)
150
+ return X_hat_scaled, X_noise_scaled
151
+
152
+ class Pipeline(BasicBlock):
153
+ def __init__(self, nmf: Union[str, BasicNMF], dataset: str='ORL', reduce: int=1, noise_type: str='uniform',
154
+ noise_level: float=0.02, random_state: int=3407, scaler: str='MinMax') -> None:
155
+ """
156
+ Initialize the pipeline.
157
+
158
+ Parameters:
159
+ - nmf (str or BasicNMF): Name of the NMF algorithm to use.
160
+ - dataset (str): Name of the dataset to use.
161
+ - reduce (int): Factor by which the image size is reduced for visualization.
162
+ - noise_type (str): Type of noise to add to the data.
163
+ - noise_level (float): Level of noise to add to the data.
164
+ - random_state (int): Random state to use for the NMF algorithm.
165
+ - scaler (str): Name of the scaler to use for scaling the data.
166
+
167
+ Returns:
168
+ None. The function will initialize the pipeline.
169
+ """
170
+ # Get the basic information for the pipeline
171
+ folder, scaler, self.nmf = self.basic_info(nmf, dataset, scaler)
172
+ # Load the data
173
+ X_hat, self.__Y_hat, self.img_size = self.load_data(folder, reduce=reduce, random_state=random_state)
174
+ # Add noise to the data
175
+ X_noise = self.add_noise(X_hat, noise_type, noise_level, random_state, reduce)
176
+ # Scale the data
177
+ self.__X_hat_scaled, self.__X_noise_scaled = self.scale(X_hat, X_noise, scaler)
178
+ self.reduce = reduce
179
+ self.random_state = random_state
180
+ # Delete the attributes that might occupy significant memory
181
+ del X_hat, X_noise, folder, scaler, noise_type, noise_level, random_state, dataset, reduce, nmf
182
+
183
+ def execute(self, max_iter: int, convergence_trend: bool=False, matrix_size: bool=False, verbose: bool=False) -> None:
184
+ """
185
+ Run the pipeline.
186
+
187
+ Parameters:
188
+ - max_iter (int): Maximum number of iterations to run the NMF algorithm.
189
+ - convergence_trend (bool): Whether to display the convergence trend of the NMF algorithm.
190
+ - matrix_size (bool): Whether to display the size of the basis and coefficient matrices.
191
+ - verbose (bool): Whether to display the verbose output of the NMF algorithm.
192
+ """
193
+ # Run NMF
194
+ self.nmf.fit(self.__X_noise_scaled, len(set(self.__Y_hat)), max_iter=max_iter,
195
+ random_state=self.random_state, imshow=convergence_trend, verbose=verbose)
196
+ # Get the dictionary and representation matrices
197
+ self.D, self.R = self.nmf.D, self.nmf.R
198
+ if matrix_size:
199
+ print('D.shape={}, R.shape={}'.format(self.D.shape, self.R.shape))
200
+ self.metrics = self.nmf.evaluate(self.__X_hat_scaled, self.__Y_hat, random_state=self.random_state)
201
+ return self.metrics
202
+
203
+ def evaluate(self, idx: int=2, imshow: bool=False) -> None:
204
+ """
205
+ Evaluate the NMF algorithm.
206
+
207
+ Parameters:
208
+ - idx (int): Index of the image to evaluate.
209
+ - imshow (bool): Whether to display the images.
210
+ """
211
+ evaluate(self.nmf, self.metrics, self.__X_hat_scaled, self.__X_noise_scaled,
212
+ self.img_size, self.reduce, idx, imshow)
213
+
214
+ def visualization(self, idx: int=2) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
215
+ """
216
+ Visualize the NMF algorithm.
217
+
218
+ Parameters:
219
+ - idx (int): Index of the image to visualize.
220
+
221
+ Returns:
222
+ - X_i (np.ndarray): The original image.
223
+ - X_noise_i (np.ndarray): The noisy image.
224
+ - DR_i (np.ndarray): The reconstructed image.
225
+ """
226
+ DR = np.dot(self.D, self.R).reshape(self.__X_hat_scaled.shape[0], self.__X_hat_scaled.shape[1])
227
+ # Calculate reduced image size based on the 'reduce' factor
228
+ img_size = [i//self.reduce for i in self.img_size]
229
+ # Retrieve the specified image from the data
230
+ X_i = self.__X_hat_scaled[:,idx].reshape(img_size[1],img_size[0])
231
+ X_noise_i = self.__X_noise_scaled[:,idx].reshape(img_size[1],img_size[0])
232
+ DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
233
+ return X_i, X_noise_i, DR_i
234
+
235
+ def cleanup(self) -> None:
236
+ """
237
+ Cleanup method to release resources and delete instances.
238
+ """
239
+ # Delete attributes that might occupy significant memory
240
+ if hasattr(self, 'nmf'):
241
+ del self.nmf, self.__X_hat_scaled, self.__X_noise_scaled, self.D, self.R, self.metrics
242
+
243
+ class Experiment:
244
+ """
245
+ Set up the experiment.
246
+ """
247
+ data_dirs = ['data/ORL', 'data/CroppedYaleB']
248
+ data_container = [[], []]
249
+ noises = {
250
+ 'uniform': [0.1, 0.3],
251
+ 'gaussian': [0.05, 0.08],
252
+ 'laplacian': [0.04, 0.06],
253
+ 'salt_and_pepper': [0.02, 0.1],
254
+ 'block': [10, 15],}
255
+
256
+ nmf_dict = {
257
+ 'L2NormNMF': L2NormNMF,
258
+ 'KLDivergenceNMF': KLDivergenceNMF,
259
+ 'ISDivergenceNMF': ISDivergenceNMF,
260
+ 'L21NormNMF': L21NormNMF,
261
+ 'HSCostNMF': HSCostNMF,
262
+ 'L1NormRegularizedNMF': L1NormRegularizedNMF,
263
+ 'CappedNormNMF': CappedNormNMF,
264
+ 'CauchyNMF': CauchyNMF,}
265
+
266
+ def __init__(self,
267
+ seeds: List[int]=None) -> None:
268
+ """
269
+ Initialize the experiment.
270
+
271
+ Parameters:
272
+ - seeds (List[int]): Random seeds to use for the experiment.
273
+ """
274
+ self.seeds = [0, 42, 99, 512, 3407] if seeds is None else seeds
275
+
276
+ def choose(self, nmf: Union[str, BasicNMF]) -> None:
277
+ """
278
+ Choose an NMF algorithm. Essentially, this method sets the NMF algorithm to use for the experiment.
279
+
280
+ nmf (Union[str, BasicNMF]): NMF algorithm to use.
281
+ """
282
+ if isinstance(nmf, BasicNMF):
283
+ self.nmf = nmf
284
+ else:
285
+ # Choose an NMF algorithm
286
+ self.nmf = self.nmf_dict.get(nmf, L1NormRegularizedNMF)()
287
+
288
+ def data_loader(self) -> Generator[Tuple[str, int, np.ndarray, np.ndarray, np.ndarray, str, float], None, None]:
289
+ """
290
+ Construct a generator to load the data.
291
+
292
+ Returns:
293
+ - data_file (str): Name of the dataset.
294
+ - seed (int): Random seed to use for the experiment.
295
+ - X_hat_scaled (np.ndarray): The scaled data matrix.
296
+ - Y_hat (np.ndarray): The label matrix.
297
+ - X_noise_scaled (np.ndarray): The scaled noisy data matrix.
298
+ - noise_type (str): Type of noise to add to the data.
299
+ - noise_level (float): Level of noise to add to the data.
300
+ """
301
+ scaler = MinMaxScaler()
302
+ # Data file loop
303
+ for data_file in self.data_dirs:
304
+ reduce = 1 if data_file.endswith('ORL') else 3
305
+ image_size = get_image_size(data_file)
306
+ X_hat_, Y_hat_ = load_data(root=data_file, reduce=reduce)
307
+ # Random seed loop
308
+ for seed in self.seeds:
309
+ noise_adder = NoiseAdder(random_state=seed)
310
+ X_hat, Y_hat = random_sample(X_hat_, Y_hat_, 0.9, random_state=seed)
311
+ X_hat_scaled = scaler.fit_transform(X_hat)
312
+ # Noise type loop
313
+ for noise_type in self.noises:
314
+ add_noise_ = getattr(noise_adder, f'add_{noise_type}_noise')
315
+ # Noise level loop
316
+ for noise_level in self.noises[noise_type]:
317
+ _, X_noise = add_noise_(X_hat, noise_level=noise_level) if noise_type != 'block' else add_noise_(X_hat, image_size[0]//reduce, noise_level)
318
+ X_noise_scaled = scaler.transform(X_noise)
319
+ X_noise_scaled += np.abs(np.min(X_noise_scaled)) * np.abs(np.min(X_noise_scaled)) * int(np.min(X_noise_scaled) < 0)
320
+ yield data_file.split("/")[-1], seed, X_hat_scaled, Y_hat, X_noise_scaled, noise_type, noise_level
321
+
322
+ def sync_fit(self, dataset: str, seed: int, X_hat_scaled: np.ndarray, Y_hat: np.ndarray, X_noise_scaled: np.ndarray, noise_type: str, noise_level: float) -> Tuple[str, str, float, int, float, float, float]:
323
+ """
324
+ Fit the NMF algorithm on the dataset with noise synchronously.
325
+
326
+ Parameters:
327
+ - dataset (str): Name of the dataset.
328
+ - seed (int): Random seed to use for the experiment.
329
+ - X_hat_scaled (np.ndarray): The scaled data matrix.
330
+ - Y_hat (np.ndarray): The label matrix.
331
+ - X_noise_scaled (np.ndarray): The scaled noisy data matrix.
332
+ - noise_type (str): Type of noise to add to the data.
333
+ - noise_level (float): Level of noise to add to the data.
334
+
335
+ Returns:
336
+ - dataset (str): Name of the dataset.
337
+ - noise_type (str): Type of noise to add to the data.
338
+ - noise_level (float): Level of noise to add to the data.
339
+ - seed (int): Random seed to use for the experiment.
340
+ - rmse (float): Root mean squared error of the NMF algorithm.
341
+ - acc (float): Accuracy of the NMF algorithm.
342
+ - nmi (float): Normalized mutual information of the NMF algorithm.
343
+ """
344
+ self.nmf.fit(X_noise_scaled, len(set(Y_hat)), random_state=seed, verbose=False)
345
+ # Display the current experiment information
346
+ logging.info(f'Dataset: {dataset} Random seed: {seed} - Test on {noise_type} with {noise_level} ended.')
347
+ return dataset, noise_type, noise_level, seed, *self.nmf.evaluate(X_hat_scaled, Y_hat, random_state=seed)
348
+
349
+ def execute(self) -> None:
350
+ """
351
+ Execute the experiments.
352
+ """
353
+ # Lazy import to avoid multiprocessing error
354
+ import multiprocessing
355
+ results = []
356
+ # Define the multiprocessing pool
357
+ with multiprocessing.Pool(10) as pool:
358
+ for result in pool.starmap(self.sync_fit, self.data_loader()):
359
+ # Append the result to the list
360
+ results.append(result)
361
+ # Write the results to a csv file
362
+ if not os.path.exists(f'{self.nmf.name}_log.csv'):
363
+ mode = 'w'
364
+ else:
365
+ mode = 'a'
366
+ with open(f'{self.nmf.name}_log.csv', mode) as f:
367
+ writer = csv.writer(f)
368
+ if mode == 'w':
369
+ writer.writerow(['dataset', 'noise_type', 'noise_level', 'seed', 'rmse', 'acc', 'nmi'])
370
+ for result in results:
371
+ writer.writerow(result)
algorithm/preprocess.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union, Tuple
2
+
3
+ import numpy as np
4
+
5
+ class MinMaxScaler:
6
+ """
7
+ This class scales and transforms features to [0, 1].
8
+ """
9
+ def fit(self, X: np.ndarray) -> None:
10
+ """
11
+ Compute the minimum and the range of the data for later scaling.
12
+
13
+ Parameters:
14
+ - X: numpy array-like, shape (n_samples, n_features)
15
+ The data used to compute the minimum and range used for later scaling.
16
+ """
17
+ self.min_ = np.min(X, axis=0)
18
+ self.range_ = np.max(X, axis=0) - self.min_
19
+
20
+
21
+ def transform(self, X: np.ndarray) -> np.ndarray:
22
+ """
23
+ Scale the data using the values computed during the fit method.
24
+
25
+ Parameters:
26
+ - X: numpy array-like, shape (n_samples, n_features)
27
+ Input data that needs to be scaled.
28
+
29
+ Returns:
30
+ - numpy array, shape (n_samples, n_features)
31
+ Transformed data.
32
+ """
33
+ return (X - self.min_) / self.range_
34
+
35
+ def fit_transform(self, X: np.ndarray) -> np.ndarray:
36
+ """
37
+ Fit to the data and then transform it.
38
+
39
+ Parameters:
40
+ - X: numpy array-like, shape (n_samples, n_features)
41
+ Input data that needs to be scaled and transformed.
42
+
43
+ Returns:
44
+ - numpy array, shape (n_samples, n_features)
45
+ Transformed data.
46
+ """
47
+ self.fit(X)
48
+ return self.transform(X)
49
+
50
+ class StandardScaler:
51
+ """
52
+ This class standardizes features by removing the mean and scaling to unit variance.
53
+ """
54
+ def fit(self, X: np.ndarray) -> None:
55
+ """
56
+ Compute the mean and standard deviation of the data for later standardization.
57
+
58
+ Parameters:
59
+ - X: numpy array-like, shape (n_samples, n_features)
60
+ The data used to compute the mean and standard deviation used for later standardization.
61
+ """
62
+ self.mean_ = np.mean(X, axis=0)
63
+ self.std_ = np.std(X, axis=0)
64
+
65
+ def transform(self, X: np.ndarray) -> np.ndarray:
66
+ """
67
+ Standardize the data using the values computed during the fit method.
68
+
69
+ Parameters:
70
+ - X: numpy array-like, shape (n_samples, n_features)
71
+ Input data that needs to be standardized.
72
+
73
+ Returns:
74
+ - numpy array, shape (n_samples, n_features)
75
+ Transformed data.
76
+ """
77
+ return (X - self.mean_) / self.std_
78
+
79
+ def fit_transform(self, X: np.ndarray) -> np.ndarray:
80
+ """
81
+ Fit to the data and then transform it.
82
+
83
+ Parameters:
84
+ - X: numpy array-like, shape (n_samples, n_features)
85
+ Input data that needs to be standardized and transformed.
86
+
87
+ Returns:
88
+ - numpy array, shape (n_samples, n_features)
89
+ Transformed data.
90
+ """
91
+ self.fit(X)
92
+ return self.transform(X)
93
+
94
+ class NoiseAdder:
95
+ """
96
+ This class adds noise to data.
97
+ """
98
+ def __init__(self, random_state: Union[int, np.random.RandomState, None]=None) -> None:
99
+ """
100
+ Initializes the NoiseAdder with a random state and noise parameters.
101
+
102
+ Parameters:
103
+ - random_state (int or RandomState instance or None): Controls the randomness. If int, is used as seed for RandomState.
104
+ - noise_params (dict): Additional noise parameters.
105
+ """
106
+ self.rng = np.random.RandomState(random_state)
107
+
108
+ def add_uniform_noise(self, X_hat: np.ndarray, noise_level: int=0.1) -> Tuple[np.ndarray, np.ndarray]:
109
+ """
110
+ Add uniform random noise to data.
111
+
112
+ Parameters:
113
+ - X_hat (numpy array): Original data.
114
+
115
+ Returns:
116
+ - Numpy array of uniform noise.
117
+ - Numpy array with added uniform noise.
118
+ """
119
+ a, b = 0, 1
120
+ # Generate noise
121
+ X_noise = self.rng.uniform(a, b, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
122
+ return X_noise, X_hat + X_noise
123
+
124
+ def add_gaussian_noise(self, X_hat, noise_level=0.1):
125
+ """
126
+ Add Gaussian noise to data.
127
+
128
+ Parameters:
129
+ - X_hat (numpy array): Original data.
130
+ - mean (float): Mean of the Gaussian distribution.
131
+ - std (float): Standard deviation of the Gaussian distribution.
132
+
133
+ Returns:
134
+ - Numpy array of Gaussian noise.
135
+ - Numpy array with added Gaussian noise.
136
+ """
137
+ mean, std = 0, 1
138
+ # Generate noise
139
+ X_noise = self.rng.normal(mean, std, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
140
+ return X_noise, X_hat + X_noise
141
+
142
+ def add_laplacian_noise(self, X_hat, noise_level=0.1):
143
+ """
144
+ Add Laplacian noise to data.
145
+
146
+ Parameters:
147
+ - X_hat (numpy array): Original data.
148
+ - mu (float): Location parameter for the Laplacian distribution.
149
+ - lambd (float): Scale (diversity) parameter for the Laplacian distribution.
150
+
151
+ Returns:
152
+ - Numpy array of Laplacian noise.
153
+ - Numpy array with added Laplacian noise.
154
+ """
155
+ # Initialize parameters
156
+ mu, lambd = 0, 1
157
+ # Generate noise
158
+ X_noise = self.rng.laplace(mu, lambd, size=X_hat.shape) * noise_level * np.max(X_hat)
159
+ return X_noise, X_hat + X_noise
160
+
161
+ def add_block_noise(self, X_hat: np.ndarray, img_width: int, block_size: int=10) -> Tuple[np.ndarray, np.ndarray]:
162
+ """
163
+ Add block noise to multiple flattened image samples.
164
+
165
+ Parameters:
166
+ - X (numpy array): Array of shape (m, n) where m is flattened image length and n is number of samples
167
+ - img_width (int): width of the original image
168
+ - block_size (int): size of the block to occlude
169
+
170
+ Returns:
171
+ - Numpy array of noise added to each sample
172
+ - Numpy array with added block noise for all samples
173
+ """
174
+ # Initalize parameters
175
+ X = X_hat.copy()
176
+ m, n_samples = X.shape
177
+ X_noise = np.zeros((m, n_samples), dtype=np.uint8)
178
+ # For each sample in X
179
+ for i in range(n_samples):
180
+ sample = X[:, i]
181
+ # Reshape the flattened array to 2D
182
+ img_2d = sample.reshape(-1, img_width)
183
+ height, width = img_2d.shape
184
+ # Ensure the block size isn't larger than the image dimensions
185
+ block_size = min(block_size, width, height)
186
+ # Generate a random starting point for the block
187
+ x_start = self.rng.randint(0, width - block_size)
188
+ y_start = self.rng.randint(0, height - block_size)
189
+ # Add block noise
190
+ img_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
191
+ # Store the noise block to noise array
192
+ noise_2d = np.zeros((height, width), dtype=np.uint8)
193
+ noise_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
194
+ X_noise[:, i] = noise_2d.ravel()
195
+ # Flatten the array back to 1D and store back in X
196
+ X[:, i] = img_2d.ravel()
197
+ return X_noise, X
198
+
199
+ def add_salt_and_pepper_noise(self, X_hat, noise_level=0.02, salt_ratio=0.5) -> Tuple[np.ndarray, np.ndarray]:
200
+ """
201
+ Add "salt and pepper" noise to data.
202
+
203
+ Parameters:
204
+ - X_hat (numpy array): Original data.
205
+ - amount (float): Proportion of image pixels to be replaced.
206
+ - salt_ratio (float): Proportion of replaced pixels that are "salt".
207
+
208
+ Returns:
209
+ - Numpy array of salt and pepper noise.
210
+ - Numpy array with added salt and pepper noise.
211
+ """
212
+ # Initialize parameters
213
+ X = X_hat.copy()
214
+ X_noise = np.zeros_like(X)
215
+ # Get the total number of pixels that should be replaced by noise
216
+ total_pixels = X.size
217
+ num_noise_pixels = int(total_pixels * noise_level)
218
+ # Separate the number of salt and pepper pixels based on the salt_ratio
219
+ num_salt = int(num_noise_pixels * salt_ratio)
220
+ num_pepper = num_noise_pixels - num_salt
221
+ # Directly generate the noise coordinates without overlap
222
+ noise_coords = self.rng.choice(total_pixels, num_noise_pixels, replace=False)
223
+ salt_coords = noise_coords[:num_salt]
224
+ pepper_coords = noise_coords[num_salt:]
225
+ # Convert the 1D noise coordinates back to tuple of N-dim coordinates
226
+ salt_coords = np.unravel_index(salt_coords, X.shape)
227
+ pepper_coords = np.unravel_index(pepper_coords, X.shape)
228
+ # Set salt and pepper pixels in the image
229
+ max_pixel_val = np.max(X)
230
+ X_noise[salt_coords] = max_pixel_val
231
+ X_noise[pepper_coords] = 0
232
+ X[salt_coords] = max_pixel_val
233
+ X[pepper_coords] = 0
234
+ return X_noise, X
algorithm/sample.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union
2
+
3
+ import numpy as np
4
+
5
+ def random_sample(X: np.ndarray, Y: np.ndarray, fraction: float=0.90, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
6
+ """
7
+ Randomly sample a fraction of the data.
8
+
9
+ Parameters:
10
+ - X (numpy.ndarray): The input data matrix of shape (n_features, n_samples)
11
+ where n_samples is the number of samples, and n_features
12
+ is the number of features.
13
+ - Y (numpy.ndarray): The output data matrix of shape (n_samples, )
14
+ - fraction (float): The fraction of the data to be sampled.
15
+ - random_state (int): The seed for the random number generator.
16
+
17
+ Returns:
18
+ - X_sample (numpy.ndarray): The sampled data matrix of shape (n_features, n_samples)
19
+ where n_samples is the number of samples, and n_features
20
+
21
+ """
22
+
23
+ # Create a random number generator
24
+ rng = np.random.default_rng(random_state)
25
+
26
+ # Compute the number of samples to be drawn
27
+ n_samples = X.shape[1]
28
+ sample_size = int(fraction * n_samples)
29
+
30
+ # Randomly sample the indices
31
+ sampled_indices = rng.choice(n_samples, sample_size, replace=False)
32
+
33
+ # Use the sampled indices to extract columns from the original data
34
+ X_sample = X[:, sampled_indices]
35
+ Y_sample = Y[sampled_indices]
36
+
37
+ return X_sample, Y_sample
algorithm/user_evaluate.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+
3
+ import numpy as np
4
+
5
+ from algorithm.nmf import BasicNMF
6
+ from algorithm.visualize import origin_versus_dictrep
7
+
8
+ def evaluate(nmf: BasicNMF, metrics: Tuple, X: np.ndarray, X_noise: np.ndarray,
9
+ image_size: tuple, reduce: int, idx=2, imshow: bool=False) -> None:
10
+
11
+ """
12
+ Evaluate the performance of NMF algorithms.
13
+
14
+ Parameters
15
+ - nmf (BasicNMF): The NMF algorithm.
16
+ - metrics (tuple): The evaluation metrics, (rmse, acc, nmi).
17
+ - X (numpy.ndarray): The original data matrix, shape (n_samples, n_features).
18
+ - X_noise (numpy.ndarray): The noisy data matrix, shape (n_samples, n_features).
19
+ - image_size (tuple): The size of images.
20
+ - reduce (int): The reduction ratio of images.
21
+ - idx (int): The index of the image to be visualized.
22
+ - random_state (int): The random state.
23
+ """
24
+ # Start to evaluate
25
+ print('Evaluating...')
26
+ rmse, acc, nmi = metrics
27
+ # Visualize
28
+ print('RMSE = {:.4f}'.format(rmse))
29
+ print('Accuracy = {:.4f}'.format(acc))
30
+ print('NMI = {:.4f}'.format(nmi))
31
+ if imshow:
32
+ origin_versus_dictrep(X, nmf.D, nmf.R, X_noise, image_size=image_size, reduce=reduce, idx=idx)
algorithm/visualize.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+
4
+ def origin_plus_noise(X_hat: np.ndarray, X_noise: np.ndarray, X: np.ndarray, image_size: tuple, reduce: int, idx: int=2) -> None:
5
+ """
6
+ Display the original image, the noise, and the image with added noise side by side.
7
+
8
+ Parameters:
9
+ - X_hat (numpy.ndarray): Original image data.
10
+ - X_noise (numpy.ndarray): Noise data to be added to the original image.
11
+ - image_size (tuple): Size of the original image as (height, width).
12
+ - reduce (int): Factor to downscale the image dimensions.
13
+ - idx (int, optional): Index of the image to be displayed. Default is 2.
14
+ """
15
+
16
+ # Calculate reduced image size based on the 'reduce' factor
17
+ img_size = [i//reduce for i in image_size]
18
+
19
+ # Retrieve the specified image from the data
20
+ X_hat_i = X_hat[:,idx].reshape(img_size[1],img_size[0])
21
+ X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
22
+ X_i = X[:,idx].reshape(img_size[1],img_size[0])
23
+
24
+ # Set up the figure for displaying images
25
+ plt.figure(figsize=(12,3)) # Adjusted size for better visualization
26
+
27
+ # Display the original image
28
+ plt.subplot(151) # Adjusted to 1x4 grid for space to '+' and '=' symbols
29
+ plt.imshow(X_hat_i, cmap=plt.cm.gray)
30
+ plt.title('Image(Original)')
31
+ plt.axis('off') # Hide axis for a cleaner look
32
+
33
+ # Place '+' symbol between images
34
+ plt.subplot(152)
35
+ plt.text(0.5, 0.5, '+', fontsize=20, ha='center', va='center')
36
+ plt.axis('off') # Hide axis
37
+
38
+ # Display the noise
39
+ plt.subplot(153)
40
+ plt.imshow(X_noise_i, cmap=plt.cm.gray)
41
+ plt.title('Noise')
42
+ plt.axis('off') # Hide axis for a cleaner look
43
+
44
+ # Place '=' symbol between images
45
+ plt.subplot(154)
46
+ plt.text(0.5, 0.5, '=', fontsize=20, ha='center', va='center')
47
+ plt.axis('off') # Hide axis
48
+
49
+ # Display the image with added noise
50
+ plt.subplot(155)
51
+ plt.imshow(X_i, cmap=plt.cm.gray)
52
+ plt.title('Image(Noise)')
53
+ plt.axis('off') # Hide axis for a cleaner look
54
+
55
+ # Render the figure
56
+ plt.tight_layout() # Ensure no overlap between subplots
57
+ plt.show()
58
+
59
+ def origin_versus_dictrep(X: np.ndarray, D: np.ndarray, R: np.ndarray, X_noise: np.ndarray, image_size: tuple, reduce: int, idx: int) -> None:
60
+ """
61
+ Display the original, noise-added, and dictionary-reconstructed images side by side.
62
+
63
+ Parameters:
64
+ - X (numpy.ndarray): Original data matrix of shape (n_samples, n_features).
65
+ - D (numpy.ndarray): Basis matrix obtained from dictionary learning.
66
+ - R (numpy.ndarray): Coefficient matrix.
67
+ - X_noise (numpy.ndarray): Noise-added version of the original data matrix.
68
+ - image_size (tuple): Tuple containing the height and width of the image.
69
+ - reduce (int): Factor by which the image size is reduced for visualization.
70
+ - idx (int): Index of the image to display.
71
+
72
+ Returns:
73
+ None. The function will plot and display the images using matplotlib.
74
+ """
75
+
76
+ DR = np.dot(D, R).reshape(X.shape[0], X.shape[1])
77
+ # Calculate reduced image size based on the 'reduce' factor
78
+ img_size = [i//reduce for i in image_size]
79
+
80
+ # Retrieve the specified image from the data
81
+ X_i = X[:,idx].reshape(img_size[1],img_size[0])
82
+ X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
83
+ DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
84
+
85
+ # Set up the figure for displaying images
86
+ plt.figure(figsize=(12,3)) # Adjusted size for better visualization
87
+
88
+ # Display the original image
89
+ plt.subplot(131)
90
+ plt.imshow(X_i, cmap=plt.cm.gray)
91
+ plt.title('Image(Original)')
92
+ plt.axis('off')
93
+
94
+ # Display the reconstructed image
95
+ plt.subplot(132)
96
+ plt.imshow(X_noise_i, cmap=plt.cm.gray)
97
+ plt.title('Image(Noise)')
98
+ plt.axis('off')
99
+
100
+ # Display the sparse coefficients
101
+ plt.subplot(133)
102
+ plt.imshow(DR_i, cmap=plt.cm.gray)
103
+ plt.title('Image(Reconstructed))')
104
+ plt.axis('off')
105
+
106
+ # Render the figure
107
+ plt.tight_layout()
108
+ plt.show()
109
+
110
+ return X_i, X_noise_i, DR_i
111
+
112
+ def origin_noise_dictrep(X: np.ndarray, X_noise: np.ndarray, D: np.ndarray, R: np.ndarray, image_size: tuple, reduce: int, idx: int) -> None:
113
+ """
114
+ Display the original image, its noise version, and its dictionary-reconstructed representation side by side.
115
+
116
+ Parameters:
117
+ - X (numpy.ndarray): Original data matrix of shape (n_samples, n_features).
118
+ - X_noise (numpy.ndarray): Noise-added version of the original data matrix.
119
+ - D (numpy.ndarray): Basis matrix obtained from dictionary learning.
120
+ - R (numpy.ndarray): Coefficient matrix.
121
+ - image_size (tuple): Tuple containing the height and width of the image.
122
+ - reduce (int): Factor by which the image size is reduced for visualization.
123
+ - idx (int): Index of the image to display.
124
+
125
+ Returns:
126
+ None. The function will plot and display the images using matplotlib.
127
+ """
128
+
129
+ DR = np.dot(D, R).reshape(X.shape[0], X.shape[1])
130
+ # Calculate reduced image size based on the 'reduce' factor
131
+ img_size = [i//reduce for i in image_size]
132
+
133
+ # Retrieve the specified image from the data
134
+ X_i = X[:,idx].reshape(img_size[1],img_size[0])
135
+ X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
136
+ DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
137
+
138
+ # Set up the figure for displaying images
139
+ plt.figure(figsize=(12,3)) # Adjusted size for better visualization
140
+
141
+ # Display the original image
142
+ plt.subplot(131)
143
+ plt.imshow(X_i, cmap=plt.cm.gray)
144
+ plt.title('Image(Original)')
145
+ plt.axis('off')
146
+
147
+ # Display the noise
148
+ plt.subplot(132)
149
+ plt.imshow(X_noise_i, cmap=plt.cm.gray)
150
+ plt.title('Image(Noise)')
151
+ plt.axis('off')
152
+
153
+ # Display the reconstructed image
154
+ plt.subplot(133)
155
+ plt.imshow(DR_i, cmap=plt.cm.gray)
156
+ plt.title('Image(Reconstructed)')
157
+ plt.axis('off')
158
+
159
+ # Render the figure
160
+ plt.tight_layout()
161
+ plt.show()
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PIL
2
+ import numpy as np
3
+ import gradio as gr
4
+
5
+ from algorithm.pipeline import Pipeline
6
+
7
+ class App:
8
+ def __init__(self,
9
+ nmf='L1NormRegularizedNMF',
10
+ dataset='YaleB',
11
+ reduce=3,
12
+ noise_type='salt_and_pepper',
13
+ noise_level=0.10,
14
+ random_state=99,
15
+ scaler='MinMax'):
16
+ self.pipeline = Pipeline(nmf=nmf,
17
+ dataset=dataset,
18
+ reduce=reduce,
19
+ noise_type=noise_type,
20
+ noise_level=noise_level,
21
+ random_state=random_state,
22
+ scaler=scaler)
23
+
24
+ def align_reduce(self, dataset_name):
25
+ return 1 if dataset_name == 'ORL' else 3
26
+
27
+ def reset_pipeline(self, nmf, dataset, reduce, noise_type, noise_level, random_state, scaler):
28
+ noise_type, noise_level = self.convert_level_to_number(noise_type, noise_level)
29
+ self.pipeline = Pipeline(nmf=nmf,
30
+ dataset=dataset,
31
+ reduce=reduce,
32
+ noise_type=noise_type,
33
+ noise_level=noise_level,
34
+ random_state=random_state,
35
+ scaler=scaler)
36
+
37
+ def convert_level_to_number(self, type, level):
38
+ map_dict = {"Uniform": {"Low": 0.1, "High": 0.3},
39
+ "Gaussian": {"Low": 0.05, "High": 0.08},
40
+ "Laplacian": {"Low": 0.04, "High": 0.06},
41
+ "Salt & Pepper": {"Low": 0.02, "High": 0.1},
42
+ "Block": {"Low": 10, "High": 15}}
43
+ type_name = type.lower() if type != "Salt & pepper" else "salt_and_pepper"
44
+ return type_name, map_dict[type][level]
45
+
46
+ def execute(self, max_iter=500, idx=9):
47
+ self.pipeline.execute(max_iter=max_iter)
48
+ return *self.visualize(idx), *self.metrics()
49
+
50
+ def visualize(self, idx=9):
51
+ image_raw, image_noise, image_recon = self.pipeline.visualization(idx=idx)
52
+ return self.array2image(image_raw), self.array2image(image_noise), self.array2image(image_recon)
53
+
54
+ def metrics(self):
55
+ return self.pipeline.metrics
56
+
57
+ def array2image(self, array):
58
+ image_size = self.pipeline.img_size
59
+ return PIL.Image.fromarray(self.scale_pixel(array)).resize((image_size))
60
+
61
+ def scale_pixel(self, image):
62
+ return ((image - image.min()) / (image.max() - image.min()) * 255).astype(np.uint8)
63
+
64
+ def clear_params(self):
65
+ self.pipeline = Pipeline()
66
+ return 'L1NormRegularizedNMF', 'YaleB', 3, 'Salt & Pepper', 'Low', 99, 'MinMax'
67
+
68
+ app = App()
69
+ image_size = app.pipeline.img_size
70
+
71
+ with gr.Blocks() as demo:
72
+ gr.Markdown("# NMF Image Reconstruction")
73
+ with gr.Row():
74
+ with gr.Group():
75
+ with gr.Row():
76
+ nmf = gr.Dropdown(
77
+ label="NMF Algorithm",
78
+ choices=['L1NormRegularizedNMF', 'L2NormNMF', 'KLDivergenceNMF',
79
+ 'ISDivergenceNMF', 'L21NormNMF', 'HSCostNMF',
80
+ 'CappedNormNMF', 'CauchyNMF'],
81
+ value='L1NormRegularizedNMF',
82
+ info="Choose the NMF algorithm.")
83
+
84
+ dataset = gr.Dropdown(
85
+ label="Dataset",
86
+ choices=['ORL', 'YaleB'],
87
+ value='YaleB',
88
+ info="Choose the dataset.")
89
+
90
+ reduce = gr.Number(
91
+ value=3,
92
+ label="Reduce",
93
+ info="Choose the reduce.")
94
+
95
+ with gr.Row():
96
+ noise_type = gr.Dropdown(
97
+ label="Noise Type",
98
+ choices=['Uniform', 'Gaussian', 'Laplacian', 'Salt & Pepper', 'Block'],
99
+ value='Salt & Pepper',
100
+ info="Choose the noise type.")
101
+
102
+ noise_level = gr.Radio(
103
+ choices=['Low', 'High'],
104
+ value='Low',
105
+ label="Noise Level",
106
+ info="Choose the noise level."
107
+ )
108
+
109
+ with gr.Row():
110
+ random_state = gr.Number(
111
+ value=99,
112
+ label="Random State",
113
+ info="Choose the random state.",)
114
+
115
+ scaler = gr.Dropdown(
116
+ label="Scaler",
117
+ choices=['MinMax', 'Standard'],
118
+ value='MinMax',
119
+ info="Choose the scaler.")
120
+
121
+ with gr.Row():
122
+ max_iter= gr.Number(
123
+ value=500,
124
+ label="Max Iteration",
125
+ info="Choose the max iteration.")
126
+ idx = gr.Number(
127
+ value=9,
128
+ label="Image Index",
129
+ info="Choose the image index.")
130
+
131
+ with gr.Row():
132
+ execute_bt = gr.Button(value="Execute Algorithm",)
133
+ clear_params_bt = gr.Button(
134
+ value="Clear Parameters")
135
+
136
+ with gr.Group():
137
+ with gr.Row():
138
+
139
+ output_image_raw = gr.Image(
140
+ height=image_size[1],
141
+ width=image_size[0],
142
+ image_mode="L",
143
+ label="Original Image",
144
+ show_download_button=True,
145
+ show_share_button=True,)
146
+ output_image_noise = gr.Image(
147
+ height=image_size[1],
148
+ width=image_size[0],
149
+ label="Noisy Image",
150
+ image_mode="L",
151
+ show_download_button=True,
152
+ show_share_button=True,)
153
+ output_image_recon = gr.Image(
154
+ height=image_size[1],
155
+ width=image_size[0],
156
+ label="Reconstructed Image",
157
+ image_mode="L",
158
+ show_download_button=True,
159
+ show_share_button=True,)
160
+
161
+ with gr.Row():
162
+ rmse = gr.Number(
163
+ label="RMSE",
164
+ info="Average root mean square error",
165
+ precision=4,)
166
+ acc = gr.Number(
167
+ label="Acc",
168
+ info="Accuracy",
169
+ precision=4,)
170
+ nmi = gr.Number(
171
+ label="NMI",
172
+ info="Normalized mutual information",
173
+ precision=4,)
174
+
175
+ clear_output_bt = gr.ClearButton(
176
+ value="Clear Output",
177
+ components=[output_image_raw, output_image_noise, output_image_recon, rmse, acc, nmi],)
178
+
179
+ nmf.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
180
+ dataset.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
181
+ dataset.input(app.align_reduce, inputs=[dataset], outputs=[reduce])
182
+ reduce.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
183
+ noise_type.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
184
+ noise_level.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
185
+ random_state.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
186
+ scaler.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
187
+ idx.input(app.visualize, inputs=[idx], outputs=[output_image_raw, output_image_noise, output_image_recon])
188
+ execute_bt.click(app.execute, inputs=[max_iter, idx], outputs=[output_image_raw, output_image_noise, output_image_recon, rmse, acc, nmi])
189
+ clear_params_bt.click(app.clear_params, outputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
190
+
191
+ if __name__ == '__main__':
192
+ demo.queue()
193
+ demo.launch(inbrowser=True,
194
+ share=True,
195
+ server_name="0.0.0.0",
196
+ server_port=8080)
data/.DS_Store ADDED
Binary file (6.15 kB). View file
 
data/CroppedYaleB/.DS_Store ADDED
Binary file (6.15 kB). View file
 
data/CroppedYaleB/yaleB01/DEADJOE ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ *** Modified files in JOE when it aborted on Fri May 18 01:57:34 2001
3
+ *** JOE was aborted by signal 1
data/CroppedYaleB/yaleB01/yaleB01_P00.info ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ yaleB01_P00_Ambient.pgm
2
+ yaleB01_P00A+000E+00.pgm
3
+ yaleB01_P00A+010E-20.pgm
4
+ yaleB01_P00A+020E-10.pgm
5
+ yaleB01_P00A+025E+00.pgm
6
+ yaleB01_P00A+020E+10.pgm
7
+ yaleB01_P00A+015E+20.pgm
8
+ yaleB01_P00A+000E+20.pgm
9
+ yaleB01_P00A-015E+20.pgm
10
+ yaleB01_P00A-020E+10.pgm
11
+ yaleB01_P00A-025E+00.pgm
12
+ yaleB01_P00A-020E-10.pgm
13
+ yaleB01_P00A-010E-20.pgm
14
+ yaleB01_P00A+000E-20.pgm
15
+ yaleB01_P00A-035E-20.pgm
16
+ yaleB01_P00A-035E+15.pgm
17
+ yaleB01_P00A+000E-35.pgm
18
+ yaleB01_P00A-005E-10.pgm
19
+ yaleB01_P00A-010E+00.pgm
20
+ yaleB01_P00A-005E+10.pgm
21
+ yaleB01_P00A+005E+10.pgm
22
+ yaleB01_P00A+010E+00.pgm
23
+ yaleB01_P00A+005E-10.pgm
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+00.pgm ADDED

Git LFS Details

  • SHA256: 73371b1ff0eb9ff5e1e9d94c16a1f96ce7a4429d1b4776d5eef58b9bcacbfd11
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+20.pgm ADDED

Git LFS Details

  • SHA256: 11d5edb69039c00085453982c11ed1787aa98b158d6b369c67932b6fa7ee746a
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+45.pgm ADDED

Git LFS Details

  • SHA256: fb84647a4ecf0c417e4212982cc22a8bacf68ce1f1b6bf70cf6cfc234458999b
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+90.pgm ADDED

Git LFS Details

  • SHA256: f71d1c12a3bd66648e257cd04946cfd3ac5a965728a9504a903ce1c40ff59b72
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-20.pgm ADDED

Git LFS Details

  • SHA256: 8ffd75a156b67d0c160e41971a5a8d9276257369f69de69f4e7323d57a118f43
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-35.pgm ADDED

Git LFS Details

  • SHA256: 0858b6739e82a940a7e85884bdc488ac8c3454c45371c7167e565e8e24f19db1
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+005E+10.pgm ADDED

Git LFS Details

  • SHA256: 6bea5747a75e7c031253fff4e627944fa4a735f41eeb50a55fcf5b8ed477bb83
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+005E-10.pgm ADDED

Git LFS Details

  • SHA256: 4aef3852347f1723bc795a2ba9e1b15b20a4176ba1d32d74c319091ad1d9fd83
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+010E+00.pgm ADDED

Git LFS Details

  • SHA256: e91521dea749ec8820d155246ce4854639518b27704f15605bdf34ed248a9cc3
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+010E-20.pgm ADDED

Git LFS Details

  • SHA256: 29d058adf69ed78dd2b80e7e926758d69bd1e2473bab86a7cbc0b38ffa730c38
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+015E+20.pgm ADDED

Git LFS Details

  • SHA256: c00a1302e691a2e9042bc3e7ce6f745eab40cc6c81daa925d1e4f7c502b3712d
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E+10.pgm ADDED

Git LFS Details

  • SHA256: 73b1a0c1128cb41214b4c507d2019c6ecf354b6da0c3e9269fc70936c333149c
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-10.pgm ADDED

Git LFS Details

  • SHA256: 485f04a7abb217d95f48721a929dd83af359581a9d3caaa28852a551fb36c8b5
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-40.pgm ADDED

Git LFS Details

  • SHA256: d3b711373033bbb178759b5f7cd7802b90d0e44cbfffa951c5b7eaffc2006e24
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+025E+00.pgm ADDED

Git LFS Details

  • SHA256: f5462d6a65c9f56462211a60e29fc7328fba381c297a44826b57aa4e894dbc66
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+15.pgm ADDED

Git LFS Details

  • SHA256: c2d4b56cc36791862710177cee9bb4f53471f55a46950bb2ece4675e2070e213
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+40.pgm ADDED

Git LFS Details

  • SHA256: 6f1856d82d1b0c57288139a27a512666364100dfd97ec61be472b97a133ee4f6
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+65.pgm ADDED

Git LFS Details

  • SHA256: 023060d133de81df518fdee8aa5c59998e102166f0663b757f69aaa215878bdb
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E-20.pgm ADDED

Git LFS Details

  • SHA256: a3fd5b91e1dd93566c14eefa3a45821a3fd4c16024b67db995f9a6f3dbdff0b5
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+050E+00.pgm ADDED

Git LFS Details

  • SHA256: 337bea8e8f5426a15bd85a9ae0522d64342a70e71a611a5247e2bce305d39718
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+050E-40.pgm ADDED

Git LFS Details

  • SHA256: f1c7b18d1bbf8024ed2a8770515f309cdba2046e902140a1084832ebf257cf76
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+060E+20.pgm ADDED

Git LFS Details

  • SHA256: 0bb5ed0f0f57fba5424a7f037bc088d4bd054f1452676f68f8da22c06a03e4bf
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+060E-20.pgm ADDED

Git LFS Details

  • SHA256: f76b2d22b34bac466129c533202a1f71010499058397f45e7b7772d55ac816eb
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+00.pgm ADDED

Git LFS Details

  • SHA256: 4b201dc5d7cb79ebee6c08d9462a0aea82cdda6f1e09adcee3dd7767bb6e7b5b
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+45.pgm ADDED

Git LFS Details

  • SHA256: 895070756f7819e136707b6a30414f6e765a1b30eff9d9992331d5ea5d004350
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E-35.pgm ADDED

Git LFS Details

  • SHA256: 8506e0289c81c8a7ed3ce5a7c8a5e1964fb5006ac9d7943405a1729e0aae5738
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+085E+20.pgm ADDED

Git LFS Details

  • SHA256: 11c85d78d8e03e96e33c37ee0995fa9bae368cb72919ad053e4732ae8fe7bddd
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+085E-20.pgm ADDED

Git LFS Details

  • SHA256: 87540aaf6beaae20f6efe00d1d050e116cae01ecca07c649a40ad70d84e0cd02
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+095E+00.pgm ADDED

Git LFS Details

  • SHA256: 6bc80f90b0d9f8bd52329046ab1b4dba200e0e5dd723990f2a186e9499f32e66
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+15.pgm ADDED

Git LFS Details

  • SHA256: adabe754242fa5f93251410ca3da39fd2a4922fa198bd155554e1f48ed83acdd
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB
data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+40.pgm ADDED

Git LFS Details

  • SHA256: aaf7906664ff9499b15f67d4f65dbf09da07f647b031b12af547dd174c73abfa
  • Pointer size: 130 Bytes
  • Size of remote file: 32.3 kB