Spaces:
Sleeping
Sleeping
XavierSpycy
commited on
Commit
·
bd67cfe
1
Parent(s):
7991934
First commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- algorithm/__init__.py +0 -0
- algorithm/__pycache__/__init__.cpython-312.pyc +0 -0
- algorithm/__pycache__/datasets.cpython-312.pyc +0 -0
- algorithm/__pycache__/nmf.cpython-312.pyc +0 -0
- algorithm/__pycache__/preprocess.cpython-312.pyc +0 -0
- algorithm/__pycache__/sample.cpython-312.pyc +0 -0
- algorithm/datasets.py +135 -0
- algorithm/nmf.py +752 -0
- algorithm/pipeline.py +371 -0
- algorithm/preprocess.py +234 -0
- algorithm/sample.py +37 -0
- algorithm/user_evaluate.py +32 -0
- algorithm/visualize.py +161 -0
- app.py +196 -0
- data/.DS_Store +0 -0
- data/CroppedYaleB/.DS_Store +0 -0
- data/CroppedYaleB/yaleB01/DEADJOE +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00.info +23 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+00.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+45.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+90.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-35.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+005E+10.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+005E-10.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+010E+00.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+010E-20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+015E+20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+020E+10.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-10.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-40.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+025E+00.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+15.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+40.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+65.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+035E-20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+050E+00.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+050E-40.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+060E+20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+060E-20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+00.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+45.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+070E-35.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+085E+20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+085E-20.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+095E+00.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+15.pgm +3 -0
- data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+40.pgm +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.pgm filter=lfs diff=lfs merge=lfs -text
|
algorithm/__init__.py
ADDED
File without changes
|
algorithm/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (148 Bytes). View file
|
|
algorithm/__pycache__/datasets.cpython-312.pyc
ADDED
Binary file (5.53 kB). View file
|
|
algorithm/__pycache__/nmf.cpython-312.pyc
ADDED
Binary file (53.1 kB). View file
|
|
algorithm/__pycache__/preprocess.cpython-312.pyc
ADDED
Binary file (11.5 kB). View file
|
|
algorithm/__pycache__/sample.cpython-312.pyc
ADDED
Binary file (1.77 kB). View file
|
|
algorithm/datasets.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Tuple
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
from PIL import Image
|
6 |
+
|
7 |
+
def validate_data_directory(root: str) -> None:
|
8 |
+
"""
|
9 |
+
Validate the data directory.
|
10 |
+
|
11 |
+
Parameters:
|
12 |
+
- root (str): Path to the dataset.
|
13 |
+
"""
|
14 |
+
|
15 |
+
# Check 1: root exists.
|
16 |
+
if not os.path.exists(root):
|
17 |
+
raise FileNotFoundError(f'{root} does not exist!')
|
18 |
+
|
19 |
+
# Check 2: data directory is not empty.
|
20 |
+
subdirs = [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]
|
21 |
+
if not subdirs:
|
22 |
+
raise FileNotFoundError(f'{root} is empty!')
|
23 |
+
|
24 |
+
# Check 3: each subdirectory contains at least one image.
|
25 |
+
for subdir in subdirs:
|
26 |
+
pgm_files = [f for f in os.listdir(os.path.join(root, subdir)) if f.endswith('.pgm')]
|
27 |
+
if not pgm_files:
|
28 |
+
raise FileNotFoundError(f'{os.path.join(root, subdir)} does not contain any image!')
|
29 |
+
|
30 |
+
|
31 |
+
def load_data(root: str='data/CroppedYaleB', reduce: int=1, global_centering: bool=True, local_centering: bool=True) -> Tuple[np.ndarray, np.ndarray]:
|
32 |
+
"""
|
33 |
+
Load ORL (or Extended YaleB) dataset into a numpy array.
|
34 |
+
|
35 |
+
Parameters:
|
36 |
+
- root (str): Path to the dataset.
|
37 |
+
- reduce (int): Scale factor for downscaling images.
|
38 |
+
- global_centering (bool): If True, apply global centering.
|
39 |
+
- local_centering (bool): If True, apply local centering.
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
- images (numpy.ndarray): Image data.
|
43 |
+
- labels (numpy.ndarray): Image labels.
|
44 |
+
"""
|
45 |
+
|
46 |
+
# Validate the data directory.
|
47 |
+
validate_data_directory(root)
|
48 |
+
|
49 |
+
images, labels = [], []
|
50 |
+
|
51 |
+
for i, person in enumerate(sorted(os.listdir(root))):
|
52 |
+
|
53 |
+
if not os.path.isdir(os.path.join(root, person)):
|
54 |
+
continue
|
55 |
+
|
56 |
+
for fname in os.listdir(os.path.join(root, person)):
|
57 |
+
|
58 |
+
# Remove background images in Extended YaleB dataset.
|
59 |
+
if fname.endswith('Ambient.pgm'):
|
60 |
+
continue
|
61 |
+
|
62 |
+
if not fname.endswith('.pgm'):
|
63 |
+
continue
|
64 |
+
|
65 |
+
# Load image.
|
66 |
+
img = Image.open(os.path.join(root, person, fname))
|
67 |
+
img = img.convert('L') # grey image.
|
68 |
+
|
69 |
+
# Reduce computation complexity.
|
70 |
+
img = img.resize([s//reduce for s in img.size])
|
71 |
+
|
72 |
+
# Convert image to numpy array.
|
73 |
+
img = np.asarray(img).reshape((-1,1))
|
74 |
+
|
75 |
+
# Collect data and label.
|
76 |
+
images.append(img)
|
77 |
+
labels.append(i)
|
78 |
+
|
79 |
+
# Concatenate all images and labels.
|
80 |
+
images = np.concatenate(images, axis=1)
|
81 |
+
labels = np.array(labels)
|
82 |
+
|
83 |
+
# Convert to float64 for numerical stability
|
84 |
+
images = images.astype(np.float64)
|
85 |
+
|
86 |
+
# Global centering.
|
87 |
+
if global_centering:
|
88 |
+
images -= images.mean(axis=0)
|
89 |
+
|
90 |
+
# Local centering.
|
91 |
+
if local_centering:
|
92 |
+
images -= images.mean(axis=1).reshape(-1, 1)
|
93 |
+
|
94 |
+
return images, labels
|
95 |
+
|
96 |
+
|
97 |
+
def get_image_size(root: str='code/dataCroppedYaleB') -> tuple:
|
98 |
+
"""
|
99 |
+
Get the size of images in the dataset.
|
100 |
+
|
101 |
+
Parameters:
|
102 |
+
- root (str): Path to the dataset.
|
103 |
+
|
104 |
+
Returns:
|
105 |
+
- img_size (tuple): Size of each image as (width, height).
|
106 |
+
"""
|
107 |
+
|
108 |
+
# Validate the data directory.
|
109 |
+
validate_data_directory(root)
|
110 |
+
|
111 |
+
img_size = None # Initialize variable to hold image size
|
112 |
+
|
113 |
+
for person in sorted(os.listdir(root)):
|
114 |
+
|
115 |
+
if not os.path.isdir(os.path.join(root, person)):
|
116 |
+
continue
|
117 |
+
|
118 |
+
for fname in os.listdir(os.path.join(root, person)):
|
119 |
+
|
120 |
+
# Remove background images in Extended YaleB dataset.
|
121 |
+
if fname.endswith('Ambient.pgm'):
|
122 |
+
continue
|
123 |
+
|
124 |
+
if not fname.endswith('.pgm'):
|
125 |
+
continue
|
126 |
+
|
127 |
+
# Load image.
|
128 |
+
img = Image.open(os.path.join(root, person, fname))
|
129 |
+
img = img.convert('L') # Grey image.
|
130 |
+
|
131 |
+
# Reduce computation complexity.
|
132 |
+
img = img.resize([s for s in img.size])
|
133 |
+
|
134 |
+
# Store the image size and return immediately
|
135 |
+
return img.size # (width, height)
|
algorithm/nmf.py
ADDED
@@ -0,0 +1,752 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
from abc import ABC, abstractmethod
|
4 |
+
from collections import Counter
|
5 |
+
from typing import Union, Dict, Tuple, Generator
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
from tqdm import tqdm
|
9 |
+
from scipy.linalg import pinv
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
from sklearn.cluster import KMeans, BisectingKMeans
|
12 |
+
from sklearn.metrics import mean_squared_error, accuracy_score, normalized_mutual_info_score
|
13 |
+
|
14 |
+
class BasicNMF(ABC):
|
15 |
+
name = 'Basic'
|
16 |
+
"""
|
17 |
+
A basic framework for Non-negative Matrix Factorization (NMF) algorithms.
|
18 |
+
"""
|
19 |
+
def __init__(self) -> None:
|
20 |
+
"""
|
21 |
+
Initialize the basic NMF algorithm.
|
22 |
+
"""
|
23 |
+
self.loss_list = []
|
24 |
+
|
25 |
+
def __PCA(self, X: np.ndarray, n_components: int) -> np.ndarray:
|
26 |
+
"""
|
27 |
+
Principal Component Analysis (PCA) for dimensionality reduction.
|
28 |
+
|
29 |
+
Parameters:
|
30 |
+
X (numpy.ndarray): Input dataset of shape (n_samples, n_features).
|
31 |
+
n_components (int): Number of principal components to retain.
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
transformed_data (numpy.ndarray): Dataset transformed into principal component space.
|
35 |
+
"""
|
36 |
+
if n_components > X.shape[1]:
|
37 |
+
raise ValueError("n_components must be less than or equal to the number of features")
|
38 |
+
|
39 |
+
# Center the data
|
40 |
+
X_centered = X - np.mean(X, axis=0)
|
41 |
+
# Calculate the covariance matrix and its eigenvalues and eigenvectors
|
42 |
+
cov_mat = np.cov(X_centered, rowvar=False)
|
43 |
+
eigenvalues, eigenvectors = np.linalg.eigh(cov_mat)
|
44 |
+
# Sort the eigenvalues and eigenvectors in descending order
|
45 |
+
sorted_indices = eigenvalues.argsort()[::-1]
|
46 |
+
eigenvectors = eigenvectors[:, sorted_indices]
|
47 |
+
# Projection matrix using the first n_components eigenvectors
|
48 |
+
projection_matrix = eigenvectors[:, :n_components]
|
49 |
+
# Project the data onto the new feature space
|
50 |
+
transformed_data = np.dot(X_centered, projection_matrix)
|
51 |
+
return transformed_data
|
52 |
+
|
53 |
+
def __FastICA(self, X: np.ndarray, max_iter: int=200, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
|
54 |
+
"""
|
55 |
+
Implementation of FastICA algorithm to separate the independent sources
|
56 |
+
from mixed signals in the input data.
|
57 |
+
|
58 |
+
Parameters:
|
59 |
+
X (numpy.ndarray): Input dataset of shape (n_samples, n_features).
|
60 |
+
max_iter (int, optional): The maximum number of iterations for the convergence of the estimation. Default is 200.
|
61 |
+
|
62 |
+
Return:
|
63 |
+
S (numpy.ndarray): Matrix of shape (n_samples, n_features) representing the estimated independent sources.
|
64 |
+
"""
|
65 |
+
# Set the random state
|
66 |
+
rng = np.random.RandomState(random_state)
|
67 |
+
# Center the data by removing the mean
|
68 |
+
X = X - np.mean(X, axis=1, keepdims=True)
|
69 |
+
n = X.shape[0]
|
70 |
+
# Compute the independent components iteratively
|
71 |
+
W = np.zeros((n, n))
|
72 |
+
for i in range(n):
|
73 |
+
w = rng.rand(n)
|
74 |
+
for j in range(max_iter): # max iterations for convergence
|
75 |
+
w_new = (X * np.dot(w, X)).mean(axis=1) - 2 * w
|
76 |
+
w_new /= np.sqrt((w_new ** 2).sum())
|
77 |
+
# Convergence check based on the weight vector's direction
|
78 |
+
if np.abs(np.abs((w_new * w).sum()) - 1) < 1e-04:
|
79 |
+
break
|
80 |
+
w = w_new
|
81 |
+
W[i, :] = w
|
82 |
+
X -= np.outer(w, np.dot(w, X))
|
83 |
+
# Compute the estimated independent sources
|
84 |
+
S = np.dot(W, X)
|
85 |
+
return S
|
86 |
+
|
87 |
+
def __NICA(self, X: np.ndarray, r: int, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
|
88 |
+
"""
|
89 |
+
Implementation of a non-negative Independent Component Analysis (NICA).
|
90 |
+
The process involves obtaining a non-negative basic matrix and a
|
91 |
+
non-negative coefficient matrix from the input data.
|
92 |
+
|
93 |
+
Parameters:
|
94 |
+
- X (numpy.ndarray): The input data matrix of shape (n_features, n_samples)
|
95 |
+
where n_samples is the number of samples, and n_features
|
96 |
+
is the number of features.
|
97 |
+
- r (int): The number of components to be retained after applying PCA.
|
98 |
+
|
99 |
+
Returns:
|
100 |
+
- W_0 (numpy.ndarray): The non-negative dictionary matrix.
|
101 |
+
- H_0 (numpy.ndarray): The non-negative representation matrix.
|
102 |
+
"""
|
103 |
+
# Set A as a pseudoinverse of X
|
104 |
+
A = pinv(X.T)
|
105 |
+
# Apply PCA on the matrix A to generate the basic matrix W
|
106 |
+
W = self.__PCA(A, n_components=r)
|
107 |
+
# Whiten the basic matrix W obtained above by using the eigenvalue decomposition of the covariance matrix of W.
|
108 |
+
eigenvalues, eigenvectors = np.linalg.eigh(np.cov(W, rowvar=False))
|
109 |
+
# Preallocate memory for whitened matrix
|
110 |
+
W_whitened = np.empty_like(W)
|
111 |
+
np.dot(W, eigenvectors, out=W_whitened)
|
112 |
+
W_whitened /= np.sqrt(eigenvalues + 1e-5)
|
113 |
+
# Implement ICA algorithm on the whitened matrix W and obtain the independent basic matrix W_0
|
114 |
+
# Assuming FastICA() returns the transformed matrix
|
115 |
+
W_0 = self.__FastICA(W_whitened, random_state=random_state)
|
116 |
+
# Preallocate memory for H_0 and calculate it
|
117 |
+
H_0 = np.empty((W_0.shape[1], X.shape[1]))
|
118 |
+
np.dot(W_0.T, X, out=H_0)
|
119 |
+
# Take the absolute value in-place
|
120 |
+
np.abs(W_0, out=W_0)
|
121 |
+
np.abs(H_0, out=H_0)
|
122 |
+
return W_0, H_0
|
123 |
+
|
124 |
+
def Kmeans(self, X: np.ndarray, n_components: int, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
|
125 |
+
"""
|
126 |
+
Initialize D and R matrices using K-means algorithm.
|
127 |
+
|
128 |
+
Parameters:
|
129 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
130 |
+
- n_components (int): The number of components for matrix factorization.
|
131 |
+
- random_state (int, np.random.RandomState, None): Random state for reproducibility.
|
132 |
+
"""
|
133 |
+
# Intialize
|
134 |
+
kmeans = KMeans(n_clusters=n_components, n_init='auto', random_state=random_state)
|
135 |
+
kmeans.fit(X.T)
|
136 |
+
D = kmeans.cluster_centers_.T
|
137 |
+
labels = kmeans.labels_
|
138 |
+
G = np.zeros(((len(labels)), n_components))
|
139 |
+
for i, label in enumerate(labels):
|
140 |
+
G[i, label] = 1
|
141 |
+
G = G / np.sqrt(np.sum(G, axis=0, keepdims=True))
|
142 |
+
G += 0.2
|
143 |
+
R = G.T
|
144 |
+
return D, R
|
145 |
+
|
146 |
+
def matrix_init(self, X: np.ndarray, n_components: int,
|
147 |
+
random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray]:
|
148 |
+
"""
|
149 |
+
Initialize D and R matrices using NICA algorithm.
|
150 |
+
|
151 |
+
Parameters:
|
152 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
153 |
+
- n_components (int): The number of components for matrix factorization.
|
154 |
+
- random_state (int, np.random.RandomState, None): Random state for reproducibility.
|
155 |
+
|
156 |
+
Returns:
|
157 |
+
- D (numpy.ndarray): The non-negative dictionary matrix.
|
158 |
+
- R (numpy.ndarray): The non-negative representation matrix.
|
159 |
+
"""
|
160 |
+
# Intialize
|
161 |
+
D, R = self.__NICA(X, n_components, random_state=random_state)
|
162 |
+
return D, R
|
163 |
+
|
164 |
+
def fit(self, X: np.ndarray, n_components: int, max_iter: int=500,
|
165 |
+
random_state: Union[int, np.random.RandomState, None]=None,
|
166 |
+
verbose: bool=True, imshow: bool=False, warm_start: bool=False, **kwargs) -> None:
|
167 |
+
"""
|
168 |
+
Non-negative Matrix Factorization (NMF) algorithm using L2-norm for convergence criterion.
|
169 |
+
|
170 |
+
Parameters:
|
171 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
172 |
+
- n_components (int): The number of components for matrix factorization.
|
173 |
+
- max_iter (int, optional): Maximum number of iterations. Default is 5000.
|
174 |
+
- verbose (bool, optional): Whether to show the progress bar.
|
175 |
+
- random_state (int, np.random.RandomState, None, optional): Random state for reproducibility. Default is None.
|
176 |
+
- imshow (bool, optional): Whether to plot convergence trend. Default is False.
|
177 |
+
- warm_start (bool, optional): Whether to continue from the previous state. Default is False.
|
178 |
+
- kwargs: Additional keyword arguments for the update rule.
|
179 |
+
"""
|
180 |
+
# Record start time
|
181 |
+
start_time = time.time()
|
182 |
+
# Initialize D and R matrices using NICA algorithm by default
|
183 |
+
if not warm_start or (warm_start and not hasattr(self, 'D') and not hasattr(self, 'R')):
|
184 |
+
self.D, self.R = self.matrix_init(X, n_components, random_state)
|
185 |
+
else:
|
186 |
+
if verbose:
|
187 |
+
print('Warm start enabled. Continuing from previous state.')
|
188 |
+
|
189 |
+
# Compute initialization time
|
190 |
+
init_time = time.time() - start_time
|
191 |
+
# Copy D and R matrices for convergence check
|
192 |
+
self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
|
193 |
+
if verbose:
|
194 |
+
print(f'Initialization done. Time elapsed: {init_time:.2f} seconds.')
|
195 |
+
# Iteratively update D and R matrices until convergence
|
196 |
+
for _ in self.conditional_tqdm(range(max_iter), verbose=verbose):
|
197 |
+
# Update D and R matrices
|
198 |
+
flag = self.update(X, **kwargs)
|
199 |
+
# Check convergence
|
200 |
+
if flag:
|
201 |
+
if verbose:
|
202 |
+
print('Converged at iteration', _)
|
203 |
+
break
|
204 |
+
if imshow:
|
205 |
+
self.plot()
|
206 |
+
|
207 |
+
@abstractmethod
|
208 |
+
def update(self, X: np.ndarray, **kwargs: Dict[str, float]) -> bool:
|
209 |
+
"""
|
210 |
+
Update rule for D and R matrices using a specific NMF algorithm, which must be implemented in the derived class.
|
211 |
+
|
212 |
+
Parameters:
|
213 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
214 |
+
- kwargs: Additional keyword arguments for the update rule.
|
215 |
+
|
216 |
+
Returns:
|
217 |
+
- flag (bool): Whether the algorithm has converged.
|
218 |
+
"""
|
219 |
+
# Calculate L2-norm based errors for convergence
|
220 |
+
e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
|
221 |
+
e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
|
222 |
+
return (e_D < 1e-6 and e_R < 1e-6)
|
223 |
+
|
224 |
+
def plot(self) -> None:
|
225 |
+
"""
|
226 |
+
Plot the convergence trend of the cost function.
|
227 |
+
"""
|
228 |
+
plt.plot(self.loss_list)
|
229 |
+
plt.xlabel('Iteration')
|
230 |
+
plt.ylabel('Cost function')
|
231 |
+
plt.grid()
|
232 |
+
plt.show()
|
233 |
+
|
234 |
+
def conditional_tqdm(self, iterable, verbose: bool=True) -> Generator[int, None, None]:
|
235 |
+
"""
|
236 |
+
Determine whether to use tqdm or not based on the verbose flag.
|
237 |
+
|
238 |
+
Parameters:
|
239 |
+
- iterable (range): Range of values to iterate over.
|
240 |
+
- verbose (bool, optional): Whether to print progress bar. Default is True.
|
241 |
+
|
242 |
+
Returns:
|
243 |
+
- item (int): Current iteration.
|
244 |
+
"""
|
245 |
+
if verbose:
|
246 |
+
for item in tqdm(iterable):
|
247 |
+
yield item
|
248 |
+
else:
|
249 |
+
for item in iterable:
|
250 |
+
yield item
|
251 |
+
|
252 |
+
def normalize(self, epsilon: float=1e-7) -> None:
|
253 |
+
"""
|
254 |
+
Normalize columns of D and rows of R.
|
255 |
+
|
256 |
+
Parameter:
|
257 |
+
- epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
|
258 |
+
"""
|
259 |
+
# Normalize columns of D and rows of R
|
260 |
+
norms = np.sqrt(np.sum(self.D**2, axis=0))
|
261 |
+
self.D /= norms[np.newaxis, :] + epsilon
|
262 |
+
self.R *= norms[:, np.newaxis]
|
263 |
+
|
264 |
+
def evaluate(self, X_clean: np.ndarray, Y_true: np.ndarray, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[float, float, float]:
|
265 |
+
"""
|
266 |
+
Evaluate the specific NMF algorithm on the specific dataset.
|
267 |
+
|
268 |
+
Parameters:
|
269 |
+
- X_clean (numpy.ndarray): The original clean data matrix of shape (n_features, n_samples).
|
270 |
+
- Y_true (numpy.ndarray): The true labels corresponding to each sample in X of shape (n_samples,).
|
271 |
+
- random_state (int, np.random.RandomState, None, optional): Random state for reproducibility. Default is None.
|
272 |
+
|
273 |
+
Returns:
|
274 |
+
- rmse (float): The root mean squared error of the reconstructed matrix and the original matrix.
|
275 |
+
- acc (float): The accuracy score of the predicted labels based on the clustering results on the reconstructed matrix.
|
276 |
+
- nmi (float): The normalized mutual information score of the predicted labels based on the clustering results on the reconstructed matrix.
|
277 |
+
"""
|
278 |
+
Y_label = self.__labeling(self.R.T, Y_true, random_state=random_state)
|
279 |
+
rmse = np.sqrt(mean_squared_error(X_clean, np.dot(self.D, self.R)))
|
280 |
+
acc = accuracy_score(Y_true, Y_label)
|
281 |
+
nmi = normalized_mutual_info_score(Y_true, Y_label)
|
282 |
+
return rmse, acc, nmi
|
283 |
+
|
284 |
+
def __labeling(self, X: np.ndarray, Y: np.ndarray, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
|
285 |
+
"""
|
286 |
+
Label data based on clusters obtained from KMeans clustering,
|
287 |
+
by assigning the most frequent label in each cluster.
|
288 |
+
|
289 |
+
Parameters:
|
290 |
+
- X (numpy.ndarray): Input feature matrix of shape (n_samples, n_features).
|
291 |
+
- Y (numpy.ndarray): True labels corresponding to each sample in X of shape (n_samples,).
|
292 |
+
|
293 |
+
Returns:
|
294 |
+
- Y_pred (numpy.ndarray): Predicted labels for each sample based on the clustering results.
|
295 |
+
|
296 |
+
Note:
|
297 |
+
This function works best when the input data is somewhat separated into distinct
|
298 |
+
clusters that align with the true labels.
|
299 |
+
"""
|
300 |
+
cluster = BisectingKMeans(len(set(Y)), random_state=random_state).fit(X)
|
301 |
+
Y_pred = np.zeros(Y.shape)
|
302 |
+
for i in set(cluster.labels_):
|
303 |
+
ind = cluster.labels_ == i
|
304 |
+
Y_pred[ind] = Counter(Y[ind]).most_common(1)[0][0] # assign label.
|
305 |
+
return Y_pred
|
306 |
+
|
307 |
+
def vectorized_armijo_rule(self, f, grad_f, X, alpha, c=1e-4, tau=0.5):
|
308 |
+
"""
|
309 |
+
Vectorized Armijo rule to find the step size for each element in the matrix.
|
310 |
+
|
311 |
+
Parameters:
|
312 |
+
- f: The objective function, which should accept a matrix and return a scalar.
|
313 |
+
- grad_f: The gradient of the objective function, which returns a matrix.
|
314 |
+
- X: Current point, a matrix.
|
315 |
+
- alpha: Initial step size, a scalar or a matrix.
|
316 |
+
- c: A constant in (0, 1), typically a small value (default is 1e-4).
|
317 |
+
- tau: Reduction factor for step size, typically in (0, 1) (default is 0.5).
|
318 |
+
|
319 |
+
Returns:
|
320 |
+
- alpha: Step sizes that satisfy the Armijo condition for each element.
|
321 |
+
"""
|
322 |
+
# Compute the initial objective function value
|
323 |
+
f_x = f(X)
|
324 |
+
# Compute the initial gradient and its norm squared
|
325 |
+
grad_f_x = grad_f(X)
|
326 |
+
norm_grad_f_x_squared = np.square(np.linalg.norm(grad_f_x, axis=(0,1), keepdims=True))
|
327 |
+
|
328 |
+
# Compute the sufficient decrease condition for the entire matrix
|
329 |
+
sufficient_decrease = f_x - c * alpha * norm_grad_f_x_squared
|
330 |
+
|
331 |
+
counter = 0
|
332 |
+
# Check the condition for each element
|
333 |
+
while np.any(f(X - alpha * grad_f_x) > sufficient_decrease) or counter >= 10:
|
334 |
+
# Reduce alpha for elements not satisfying the condition
|
335 |
+
alpha *= tau
|
336 |
+
counter += 1
|
337 |
+
return alpha
|
338 |
+
|
339 |
+
@classmethod
|
340 |
+
def from_pretrained(cls, file_path: str, **kwargs: Dict[str, float]) -> 'BasicNMF':
|
341 |
+
"""
|
342 |
+
Load the model parameters from a file.
|
343 |
+
|
344 |
+
Parameters:
|
345 |
+
- file_path (str): The path to the file where the model parameters are saved.
|
346 |
+
|
347 |
+
Returns:
|
348 |
+
- instance (BasicNMF): An instance of the BasicNMF class with the loaded parameters.
|
349 |
+
"""
|
350 |
+
import pickle
|
351 |
+
with open(os.path.join(file_path), 'rb') as file:
|
352 |
+
params = pickle.load(file)
|
353 |
+
instance = cls(**kwargs)
|
354 |
+
instance.__dict__.update(params)
|
355 |
+
return instance
|
356 |
+
|
357 |
+
def save(self, file_path: str) -> None:
|
358 |
+
"""
|
359 |
+
Save the model parameters to a file.
|
360 |
+
|
361 |
+
Parameters:
|
362 |
+
- file_path (str): The path to the file where the model parameters will be saved.
|
363 |
+
"""
|
364 |
+
import pickle
|
365 |
+
with open(file_path, 'wb') as file:
|
366 |
+
pickle.dump(self.__dict__, file)
|
367 |
+
|
368 |
+
def __call__(self, **kwargs: Dict[str, float]):
|
369 |
+
"""
|
370 |
+
Overwrite the __call__ method to fit the model with the given parameters.
|
371 |
+
"""
|
372 |
+
self.fit(**kwargs)
|
373 |
+
|
374 |
+
class L2NormNMF(BasicNMF):
|
375 |
+
name = 'L2Norm'
|
376 |
+
"""
|
377 |
+
L2-norm NMF algorithm.
|
378 |
+
"""
|
379 |
+
def __init__(self) -> None:
|
380 |
+
super().__init__()
|
381 |
+
|
382 |
+
def update(self, X: np.ndarray, threshold: float=1e-6, epsilon: float=1e-7) -> bool:
|
383 |
+
"""
|
384 |
+
Update rule for D and R matrices using L2-norm NMF algorithm.
|
385 |
+
|
386 |
+
Parameters:
|
387 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
388 |
+
- threshold (float, optional): Convergence threshold based on L2-norm. Default is 1e-6.
|
389 |
+
- epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
|
390 |
+
|
391 |
+
Returns:
|
392 |
+
- flag (bool): Whether the algorithm has converged.
|
393 |
+
"""
|
394 |
+
# Multiplicative update rule for D and R matrices
|
395 |
+
self.D *= np.dot(X, self.R.T) / (np.dot(np.dot(self.D, self.R), self.R.T) + epsilon)
|
396 |
+
self.R *= np.dot(self.D.T, X) / (np.dot(np.dot(self.D.T, self.D), self.R) + epsilon)
|
397 |
+
# Calculate the loss function
|
398 |
+
loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro') ** 2
|
399 |
+
self.loss_list.append(loss)
|
400 |
+
# Calculate L2-norm based errors for convergence
|
401 |
+
e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
|
402 |
+
e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
|
403 |
+
# Update previous matrices for next iteration
|
404 |
+
self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
|
405 |
+
return (e_D < threshold and e_R < threshold)
|
406 |
+
|
407 |
+
class KLDivergenceNMF(BasicNMF):
|
408 |
+
name = 'KLDivergence'
|
409 |
+
"""
|
410 |
+
KL-divergence NMF algorithm.
|
411 |
+
"""
|
412 |
+
def __init__(self) -> None:
|
413 |
+
"""
|
414 |
+
Initialize the KL-divergence NMF algorithm.
|
415 |
+
"""
|
416 |
+
super().__init__()
|
417 |
+
self.prev_kl = float('inf')
|
418 |
+
|
419 |
+
def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
|
420 |
+
"""
|
421 |
+
Update rule for D and R matrices using KL-divergence NMF algorithm.
|
422 |
+
|
423 |
+
Parameters:
|
424 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
425 |
+
- epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
|
426 |
+
- threshold (float, optional): Convergence threshold based on KL-divergence. Default is 1e-4.
|
427 |
+
|
428 |
+
Returns:
|
429 |
+
- flag (bool): Whether the algorithm has converged.
|
430 |
+
"""
|
431 |
+
# Multiplicative update rule for D and R matrices
|
432 |
+
self.D *= np.dot(X / (np.dot(self.D, self.R) + epsilon), self.R.T) / (np.dot(np.ones(X.shape), self.R.T) + epsilon)
|
433 |
+
self.R *= np.dot(self.D.T, X / (np.dot(self.D, self.R) + epsilon)) / (np.dot(self.D.T, np.ones(X.shape)) + epsilon)
|
434 |
+
|
435 |
+
# Calculate KL-divergence
|
436 |
+
XR = np.dot(self.D, self.R) + epsilon
|
437 |
+
kl_div = np.sum(X * np.log(np.maximum(epsilon, X / (XR + epsilon))) - X + XR)
|
438 |
+
self.loss_list.append(kl_div)
|
439 |
+
flag = abs(kl_div - self.prev_kl) < threshold
|
440 |
+
self.prev_kl = kl_div # Update previous KL divergence
|
441 |
+
return flag
|
442 |
+
|
443 |
+
class ISDivergenceNMF(BasicNMF):
|
444 |
+
name = 'ISDivergence'
|
445 |
+
"""
|
446 |
+
IS-divergence NMF algorithm.
|
447 |
+
"""
|
448 |
+
def __init__(self) -> None:
|
449 |
+
"""
|
450 |
+
Initialize the IS-divergence NMF algorithm.
|
451 |
+
"""
|
452 |
+
super().__init__()
|
453 |
+
self.prev_is_div = float('inf')
|
454 |
+
|
455 |
+
def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-6) -> bool:
|
456 |
+
"""
|
457 |
+
Update rule for D and R matrices using IS-divergence NMF algorithm.
|
458 |
+
|
459 |
+
Parameters:
|
460 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
461 |
+
- epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
|
462 |
+
- threshold (float, optional): Convergence threshold based on IS-divergence. Default is 1e-6.
|
463 |
+
|
464 |
+
Returns:
|
465 |
+
- flag (bool): Whether the algorithm has converged.
|
466 |
+
"""
|
467 |
+
# Update R
|
468 |
+
DR = np.dot(self.D, self.R)
|
469 |
+
DR = np.where(DR > 0, DR, epsilon)
|
470 |
+
self.R *= (np.dot(self.D.T, (DR ** (-2) * X))) / (np.dot(self.D.T, DR ** (-1)) + epsilon)
|
471 |
+
# Update D
|
472 |
+
DR = np.dot(self.D, self.R)
|
473 |
+
DR = np.where(DR > 0, DR, epsilon)
|
474 |
+
self.D *= (np.dot((DR ** (-2) * X), self.R.T)) / (np.dot(DR ** (-1), self.R.T) + epsilon)
|
475 |
+
# Normalize D and R
|
476 |
+
self.normalize(epsilon)
|
477 |
+
# Calculate IS-divergence
|
478 |
+
DR = np.dot(self.D, self.R) + epsilon
|
479 |
+
is_div = np.sum(-np.log(np.maximum(epsilon, X / DR)) + X / DR - 1)
|
480 |
+
# Adding L2 regularization terms to the IS-divergence
|
481 |
+
# is_div += lambd * np.linalg.norm(self.D, 'fro') ** 2 + lambd * np.linalg.norm(self.R, 'fro')**2
|
482 |
+
self.loss_list.append(is_div)
|
483 |
+
flag = np.abs(is_div - self.prev_is_div) < threshold
|
484 |
+
self.prev_is_div = is_div
|
485 |
+
return flag
|
486 |
+
|
487 |
+
class L21NormNMF(BasicNMF):
|
488 |
+
name = 'L21Norm'
|
489 |
+
"""
|
490 |
+
L21 Norm NMF algorithm.
|
491 |
+
"""
|
492 |
+
def __init__(self) -> None:
|
493 |
+
"""
|
494 |
+
Initialize the L21 Norm NMF algorithm.
|
495 |
+
"""
|
496 |
+
super().__init__()
|
497 |
+
|
498 |
+
def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
|
499 |
+
"""
|
500 |
+
Update rule for D and R matrices using L21 Norm NMF algorithm.
|
501 |
+
|
502 |
+
Parameters:
|
503 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
504 |
+
- epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
|
505 |
+
- threshold (float, optional): Convergence threshold based on L21 Norm. Default is 1e-4.
|
506 |
+
|
507 |
+
Returns:
|
508 |
+
- flag (bool): Whether the algorithm has converged.
|
509 |
+
"""
|
510 |
+
# Multiplicative update rule for D and R matrices
|
511 |
+
residual = X - np.dot(self.D, self.R) # residual.shape = (n_features, n_samples)
|
512 |
+
norm_values = np.sqrt(np.sum(residual ** 2, axis=1))
|
513 |
+
diagonal = np.diag(1.0 / (norm_values + epsilon)) # diagonal.shape = (n_features, n_features)
|
514 |
+
# Update rule for D
|
515 |
+
self.D *= (np.dot(np.dot(diagonal, X), self.R.T) / (np.dot(np.dot(np.dot(diagonal, self.D), self.R), self.R.T) + epsilon))
|
516 |
+
# Update rule for R
|
517 |
+
self.R *= (np.dot(np.dot(self.D.T, diagonal), X) / (np.dot(np.dot(np.dot(self.D.T, diagonal), self.D), self.R) + epsilon))
|
518 |
+
# Calculate the loss function
|
519 |
+
loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro')
|
520 |
+
self.loss_list.append(loss)
|
521 |
+
# Calculate L2,1-norm based errors for convergence
|
522 |
+
e_D = np.linalg.norm(self.D - self.D_prev, 'fro') / np.linalg.norm(self.D, 'fro')
|
523 |
+
e_R = np.linalg.norm(self.R - self.R_prev, 'fro') / np.linalg.norm(self.R, 'fro')
|
524 |
+
# Update previous matrices for next iteration
|
525 |
+
self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
|
526 |
+
return (e_D < threshold and e_R < threshold)
|
527 |
+
|
528 |
+
class L1NormRegularizedNMF(BasicNMF):
|
529 |
+
name = 'L1NormRegularized'
|
530 |
+
"""
|
531 |
+
L1 Norm Regularized NMF algorithm.
|
532 |
+
"""
|
533 |
+
def __init__(self) -> None:
|
534 |
+
"""
|
535 |
+
Initialize the L1 Norm Regularized NMF algorithm.
|
536 |
+
"""
|
537 |
+
super().__init__()
|
538 |
+
|
539 |
+
# Helper function
|
540 |
+
def soft_thresholding(self, x: np.ndarray, lambd: float) -> np.ndarray:
|
541 |
+
"""
|
542 |
+
Soft thresholding operator.
|
543 |
+
|
544 |
+
Parameters:
|
545 |
+
- x (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
546 |
+
- lambd (float): Threshold value.
|
547 |
+
|
548 |
+
Returns:
|
549 |
+
- y (numpy.ndarray): The updated matrix after applying the soft thresholding operator.
|
550 |
+
"""
|
551 |
+
return np.where(x > lambd, x - lambd, np.where(x < -lambd, x + lambd, 0))
|
552 |
+
|
553 |
+
def update(self, X: np.ndarray, lambd: float=0.2, epsilon: float=1e-7, threshold: float=1e-8) -> bool:
|
554 |
+
"""
|
555 |
+
Update rule for D and R matrices using L1 Norm Regularized NMF algorithm.
|
556 |
+
|
557 |
+
Parameters:
|
558 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
559 |
+
- lambd (float): Threshold value.
|
560 |
+
- epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
|
561 |
+
- threshold (float, optional): Convergence threshold based on L1 Norm Regularized. Default is 1e-8.
|
562 |
+
|
563 |
+
Returns:
|
564 |
+
- flag (bool): Whether the algorithm has converged.
|
565 |
+
"""
|
566 |
+
# Compute the error matrix
|
567 |
+
S = X - np.dot(self.D, self.R)
|
568 |
+
# Soft thresholding operator
|
569 |
+
S = self.soft_thresholding(S, lambd/2)
|
570 |
+
# Multiplicative update rule for D and R matrices
|
571 |
+
update_D = np.dot(S - X, self.R.T)
|
572 |
+
self.D *= (np.abs(update_D) - update_D) / (2 * np.dot(np.dot(self.D, self.R), self.R.T) + epsilon)
|
573 |
+
update_R = np.dot(self.D.T, S - X)
|
574 |
+
self.R *= (np.abs(update_R) - update_R) / (2 * np.dot(np.dot(self.D.T, self.D), self.R) + epsilon)
|
575 |
+
self.normalize(epsilon)
|
576 |
+
# Calculate the loss function
|
577 |
+
loss = np.linalg.norm(X - np.dot(self.D, self.R) - S, 'fro') ** 2 + lambd * np.sum(np.abs(S))
|
578 |
+
self.loss_list.append(loss)
|
579 |
+
# Calculate L2-norm based errors for convergence
|
580 |
+
e_D = np.sqrt(np.sum((self.D - self.D_prev) ** 2, axis=(0, 1))) / self.D.size
|
581 |
+
e_R = np.sqrt(np.sum((self.R - self.R_prev) ** 2, axis=(0, 1))) / self.R.size
|
582 |
+
# Update previous matrices for next iteration
|
583 |
+
self.D_prev, self.R_prev = self.D.copy(), self.R.copy()
|
584 |
+
return (e_D < threshold and e_R < threshold)
|
585 |
+
|
586 |
+
def matrix_init(self, X: np.ndarray, n_components: int,
|
587 |
+
random_state: Union[int, np.random.RandomState, None]=None) -> None:
|
588 |
+
return self.Kmeans(X, n_components, random_state)
|
589 |
+
|
590 |
+
class CauchyNMF(BasicNMF):
|
591 |
+
name = 'Cauchy'
|
592 |
+
"""
|
593 |
+
Cauchy NMF algorithm.
|
594 |
+
"""
|
595 |
+
def __init__(self) -> None:
|
596 |
+
"""
|
597 |
+
Initialize the Cauchy NMF algorithm.
|
598 |
+
"""
|
599 |
+
super().__init__()
|
600 |
+
|
601 |
+
# Helper function
|
602 |
+
def compute(self, A: np.ndarray, B: np.ndarray, epsilon: float) -> np.ndarray:
|
603 |
+
"""
|
604 |
+
Update rule for Cauchy divergence.
|
605 |
+
|
606 |
+
Parameters:
|
607 |
+
A (numpy.ndarray): The first matrix, which is noted as A.
|
608 |
+
B (numpy.ndarray): The second matrix, which is noted as B.
|
609 |
+
epsilon (float): Small constant added to denominator to prevent division by zero.
|
610 |
+
|
611 |
+
Returns:
|
612 |
+
C (numpy.ndarray): The updated matrix.
|
613 |
+
"""
|
614 |
+
temp = A ** 2 + 2 * B * A
|
615 |
+
temp = np.where(temp > 0, temp, epsilon)
|
616 |
+
return B / (A + np.sqrt(temp))
|
617 |
+
|
618 |
+
def update(self, X: np.ndarray, epsilon: float=1e-7, threshold: float=1e-4) -> bool:
|
619 |
+
"""
|
620 |
+
Update rule for D and R matrices using Cauchy NMF algorithm.
|
621 |
+
|
622 |
+
Parameters:
|
623 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
624 |
+
- epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
|
625 |
+
- threshold (float, optional): Convergence threshold based on Cauchy. Default is 1e-4.
|
626 |
+
|
627 |
+
Returns:
|
628 |
+
- flag (bool): Whether the algorithm has converged.
|
629 |
+
"""
|
630 |
+
if not hasattr(self, 'prev_cauchy_div'):
|
631 |
+
DR = np.dot(self.D, self.R)
|
632 |
+
log_residual = np.log(DR + epsilon) - np.log(X + epsilon)
|
633 |
+
residual = X - DR
|
634 |
+
self.prev_cauchy_div = np.sum(log_residual + residual / (DR + epsilon))
|
635 |
+
# Update rule for D
|
636 |
+
DR = np.dot(self.D, self.R)
|
637 |
+
A = 3 / 4 * np.dot((DR / (DR ** 2 + X + epsilon)), self.R.T)
|
638 |
+
B = np.dot(1 / (DR + epsilon), self.R.T)
|
639 |
+
self.D *= self.compute(A, B, epsilon)
|
640 |
+
# Update rule for R
|
641 |
+
DR = np.dot(self.D, self.R)
|
642 |
+
A = 3 / 4 * np.dot(self.D.T, (DR / (DR ** 2 + X + epsilon)))
|
643 |
+
B = np.dot(self.D.T, 1 / (DR + epsilon))
|
644 |
+
self.R *= self.compute(A, B, epsilon)
|
645 |
+
# Calculate Cauchy divergence
|
646 |
+
DR = np.dot(self.D, self.R)
|
647 |
+
cauchy_div = np.sum(np.log(DR + epsilon) - np.log(X + epsilon) + (X - DR) / (DR + epsilon))
|
648 |
+
self.loss_list.append(cauchy_div)
|
649 |
+
flag = abs(cauchy_div - self.prev_cauchy_div) < threshold
|
650 |
+
self.prev_cauchy_div = cauchy_div # Update previous Cauchy divergence
|
651 |
+
return flag
|
652 |
+
|
653 |
+
class CappedNormNMF(BasicNMF):
|
654 |
+
name = 'CappedNorm'
|
655 |
+
"""
|
656 |
+
Capped Norm NMF algorithm.
|
657 |
+
"""
|
658 |
+
def __init__(self) -> None:
|
659 |
+
"""
|
660 |
+
Initialize Capped Norm NMF algorithm.
|
661 |
+
"""
|
662 |
+
super().__init__()
|
663 |
+
self.loss_prev = float('inf')
|
664 |
+
|
665 |
+
# Helper function
|
666 |
+
def matrix_init(self, X: np.ndarray, n_components: int,
|
667 |
+
random_state: Union[int, np.random.RandomState, None]=None) -> None:
|
668 |
+
return self.Kmeans(X, n_components, random_state)
|
669 |
+
|
670 |
+
def update(self, X, theta: float=0.2, threshold: float=1e-3, epsilon: float=1e-7) -> bool:
|
671 |
+
"""
|
672 |
+
Update rule for D and R matrices using Capped Norm NMF algorithm.
|
673 |
+
|
674 |
+
Parameters:
|
675 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
676 |
+
- theta (float, optional): Outlier parameter. Default is 0.2.
|
677 |
+
- threshold (float, optional): Convergence threshold based on L2,1-norm. Default is 1e-4.
|
678 |
+
- epsilon (float, optional): Small constant added to denominator to prevent division by zero. Default is 1e-7.
|
679 |
+
"""
|
680 |
+
if not hasattr(self, 'I'):
|
681 |
+
self.n_samples = X.shape[1]
|
682 |
+
self.I = np.identity(self.n_samples)
|
683 |
+
# Multiplicative update rule for D and R matrices
|
684 |
+
G = self.R.T
|
685 |
+
self.D *= np.dot(np.dot(X, self.I), G) / (np.dot(np.dot(np.dot(self.D, G.T), self.I), G) + epsilon)
|
686 |
+
G *= np.sqrt((np.dot(np.dot(self.I, X.T), self.D)) / (np.dot(np.dot(np.dot(np.dot(self.I, G), G.T), X.T), self.D) + epsilon))
|
687 |
+
self.R = G.T
|
688 |
+
# Update rule for I
|
689 |
+
diff = X - np.dot(self.D, self.R)
|
690 |
+
norms = np.linalg.norm(diff, axis=0)
|
691 |
+
norms /= np.max(norms)
|
692 |
+
I = np.full_like(norms, epsilon)
|
693 |
+
indices = np.where(norms < theta)
|
694 |
+
I[indices] = 1 / (2 * norms[indices])
|
695 |
+
self.I = np.diagflat(I)
|
696 |
+
# Calculate the loss function
|
697 |
+
loss = np.linalg.norm(X - np.dot(self.D, self.R), 'fro') ** 2
|
698 |
+
flag = abs(loss - self.loss_prev) < threshold
|
699 |
+
self.loss_list.append(loss)
|
700 |
+
self.loss_prev = loss
|
701 |
+
return flag
|
702 |
+
|
703 |
+
class HSCostNMF(BasicNMF):
|
704 |
+
name = 'HSCost'
|
705 |
+
"""
|
706 |
+
Hypersurface Cost NMF algorithm.
|
707 |
+
"""
|
708 |
+
def __init__(self) -> None:
|
709 |
+
"""
|
710 |
+
Initialize Hypersurface Cost NMF algorithm.
|
711 |
+
"""
|
712 |
+
super().__init__()
|
713 |
+
self.loss_prev = float('inf')
|
714 |
+
# Objective function and its gradient
|
715 |
+
self.obj_func = lambda X, D, R: np.linalg.norm(X - np.dot(D, R), 'fro')
|
716 |
+
self.grad_D = lambda X, D, R: (np.dot((np.dot(D, R) - X), R.T)) / np.sqrt(1 + np.linalg.norm(X - np.dot(D, R), 'fro'))
|
717 |
+
self.grad_R = lambda X, D, R: (np.dot(D.T, (np.dot(D, R) - X))) / np.sqrt(1 + np.linalg.norm(X - np.dot(D, R), 'fro'))
|
718 |
+
|
719 |
+
def update(self, X: np.ndarray, threshold: float=1e-8, alpha: float=0.1, beta: float=0.1, c: float=1e-4, tau: float=0.5) -> bool:
|
720 |
+
"""
|
721 |
+
Update rule for D and R matrices using Hypersurface Cost NMF algorithm.
|
722 |
+
|
723 |
+
Parameters:
|
724 |
+
- X (numpy.ndarray): Input data matrix of shape (n_features, n_samples).
|
725 |
+
- alpha (float, optional): Learning rate for gradient descent. Default is 0.1.
|
726 |
+
- beta (float, optional): Learning rate for gradient descent. Default is 0.1.
|
727 |
+
- c (float, optional): A constant in (0, 1), typically a small value. Default is 1e-4.
|
728 |
+
- tau (float, optional): A reduction factor for step size, typically in (0, 1). Default is 0.5.
|
729 |
+
|
730 |
+
Returns:
|
731 |
+
- flag (bool): Whether the algorithm has converged.
|
732 |
+
"""
|
733 |
+
if not hasattr(self, 'alpha'):
|
734 |
+
self.alpha = np.full_like(self.D, alpha)
|
735 |
+
self.beta = np.full_like(self.R, beta)
|
736 |
+
# Vectorized Armijo rule to update alpha and beta
|
737 |
+
self.alpha = self.vectorized_armijo_rule(lambda D: self.obj_func(X, D, self.R), lambda D: self.grad_D(X, D, self.R), self.D, self.alpha, c, tau)
|
738 |
+
self.beta = self.vectorized_armijo_rule(lambda R: self.obj_func(X, self.D, R), lambda R: self.grad_R(X, self.D, R), self.R, self.beta, c, tau)
|
739 |
+
self.alpha = np.maximum(self.alpha, threshold)
|
740 |
+
self.beta = np.maximum(self.beta, threshold)
|
741 |
+
# Update rule for D and R
|
742 |
+
self.D -= self.alpha * (np.dot((np.dot(self.D, self.R) - X), self.R.T)) / np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro'))
|
743 |
+
self.R -= self.beta * (np.dot(self.D.T, (np.dot(self.D, self.R) - X))) / np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro'))
|
744 |
+
self.D[np.where(self.D < 0)] = 0
|
745 |
+
self.R[np.where(self.R < 0)] = 0
|
746 |
+
# Calculate loss
|
747 |
+
loss_current = np.sqrt(1 + np.linalg.norm(X - np.dot(self.D, self.R), 'fro')) - 1
|
748 |
+
self.loss_list.append(loss_current)
|
749 |
+
flag = abs(loss_current - self.loss_prev) < threshold
|
750 |
+
# Update previous loss for next iteration
|
751 |
+
self.loss_prev = loss_current
|
752 |
+
return flag
|
algorithm/pipeline.py
ADDED
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import csv
|
3 |
+
import logging
|
4 |
+
from typing import Union, List, Tuple, Generator
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
|
9 |
+
from algorithm.datasets import load_data, get_image_size
|
10 |
+
from algorithm.preprocess import NoiseAdder, MinMaxScaler, StandardScaler
|
11 |
+
from algorithm.sample import random_sample
|
12 |
+
from algorithm.nmf import BasicNMF, L2NormNMF, KLDivergenceNMF, ISDivergenceNMF, L21NormNMF, HSCostNMF, L1NormRegularizedNMF, CappedNormNMF, CauchyNMF
|
13 |
+
from algorithm.user_evaluate import evaluate
|
14 |
+
|
15 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
16 |
+
|
17 |
+
def summary(log_file_name: str) -> pd.DataFrame:
|
18 |
+
"""
|
19 |
+
Parameter:
|
20 |
+
log_file_name (str): The name of the log file to read.
|
21 |
+
|
22 |
+
Return:
|
23 |
+
result (pandas.DataFrame): The summary of the log file.
|
24 |
+
"""
|
25 |
+
df = pd.read_csv(log_file_name)
|
26 |
+
result = df.groupby(by=['dataset', 'noise_type', 'noise_level'])[['rmse', 'nmi', 'acc']].mean()
|
27 |
+
return result
|
28 |
+
|
29 |
+
class BasicBlock(object):
|
30 |
+
"""
|
31 |
+
Basic block for the pipeline.
|
32 |
+
"""
|
33 |
+
def basic_info(self, nmf: Union[BasicNMF, str], dataset: str, scaler: str) -> Tuple[str, Union[MinMaxScaler, StandardScaler], BasicNMF]:
|
34 |
+
"""
|
35 |
+
Get the basic information for the pipeline.
|
36 |
+
|
37 |
+
Parameters:
|
38 |
+
- nmf (Union[BasicNMF, str]): NMF algorithm to use.
|
39 |
+
- dataset (str): Name of the dataset to use.
|
40 |
+
- scaler (str): Name of the scaler to use.
|
41 |
+
|
42 |
+
Returns:
|
43 |
+
- folder (str): Folder of the dataset.
|
44 |
+
- scaler (MinMaxScaler or StandardScaler): Scaler to use.
|
45 |
+
- nmf (BasicNMF): NMF algorithm to use.
|
46 |
+
"""
|
47 |
+
# Create mappings for the NMF algorithms, datasets, and scalers
|
48 |
+
# Store NMF algorithms in a dictionary
|
49 |
+
nmf_dict = {
|
50 |
+
'L2NormNMF': L2NormNMF,
|
51 |
+
'KLDivergenceNMF': KLDivergenceNMF,
|
52 |
+
'ISDivergenceNMF': ISDivergenceNMF,
|
53 |
+
'L21NormNMF': L21NormNMF,
|
54 |
+
'HSCostNMF': HSCostNMF,
|
55 |
+
'L1NormRegularizedNMF': L1NormRegularizedNMF,
|
56 |
+
'CappedNormNMF': CappedNormNMF,
|
57 |
+
'CauchyNMF': CauchyNMF
|
58 |
+
}
|
59 |
+
# Store datasets in a dictionary
|
60 |
+
dataset_dict = {
|
61 |
+
'ORL': 'data/ORL',
|
62 |
+
'YaleB': 'data/CroppedYaleB'
|
63 |
+
}
|
64 |
+
# Store scalers in a dictionary
|
65 |
+
scaler_dict = {
|
66 |
+
'MinMax': MinMaxScaler(),
|
67 |
+
'Standard': StandardScaler()
|
68 |
+
}
|
69 |
+
folder = dataset_dict.get(dataset, 'data/ORL')
|
70 |
+
# Scale the data
|
71 |
+
scaler = scaler_dict.get(scaler, MinMaxScaler())
|
72 |
+
# Choose an NMF algorithm
|
73 |
+
if isinstance(nmf, BasicNMF):
|
74 |
+
nmf = nmf
|
75 |
+
else:
|
76 |
+
# Choose an NMF algorithm
|
77 |
+
nmf = nmf_dict.get(nmf, L1NormRegularizedNMF)()
|
78 |
+
return folder, scaler, nmf
|
79 |
+
|
80 |
+
def load_data(self, folder: str, reduce: int=1, random_state: Union[int, np.random.RandomState, None]=None) -> Tuple[np.ndarray, np.ndarray, Tuple[int, int]]:
|
81 |
+
"""
|
82 |
+
Load the data.
|
83 |
+
|
84 |
+
Parameters:
|
85 |
+
- folder (str): Folder of the dataset.
|
86 |
+
- reduce (int): Factor by which the image size is reduced for visualization.
|
87 |
+
- random_state (Union[int, np.random.RandomState, None]): Random state to use for sampling.
|
88 |
+
|
89 |
+
Returns:
|
90 |
+
- X_hat (np.ndarray): The data matrix.
|
91 |
+
- Y_hat (np.ndarray): The label matrix.
|
92 |
+
- img_size (Tuple[int, int]): Size of the images.
|
93 |
+
"""
|
94 |
+
# Load ORL dataset
|
95 |
+
X_hat, Y_hat = load_data(folder, reduce=reduce)
|
96 |
+
# Randomly sample 90% of the data
|
97 |
+
X_hat, Y_hat = random_sample(X_hat, Y_hat, 0.9, random_state=random_state)
|
98 |
+
# Get the size of images
|
99 |
+
img_size = get_image_size(folder)
|
100 |
+
return X_hat, Y_hat, img_size
|
101 |
+
|
102 |
+
def add_noise(self, X_hat: np.ndarray, noise_type: str, noise_level: float, random_state: Union[int, np.random.RandomState, None], reduce: int) -> np.ndarray:
|
103 |
+
"""
|
104 |
+
Add noise to the data.
|
105 |
+
|
106 |
+
Parameters:
|
107 |
+
- X_hat (np.ndarray): The data matrix.
|
108 |
+
- noise_type (str): Type of noise to add to the data.
|
109 |
+
- noise_level (float): Level of noise to add to the data.
|
110 |
+
- random_state (Union[int, np.random.RandomState, None]): Random state to use for adding noise.
|
111 |
+
- reduce (int): Factor by which the image size is reduced for visualization.
|
112 |
+
|
113 |
+
Returns:
|
114 |
+
- X_noise (np.ndarray): The noisy data matrix.
|
115 |
+
"""
|
116 |
+
# Set random state and noise adder
|
117 |
+
noise_adder = NoiseAdder(random_state=random_state)
|
118 |
+
# Create a dictionary of noise functions
|
119 |
+
noise_dict = {
|
120 |
+
'uniform': (noise_adder.add_uniform_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
|
121 |
+
'gaussian': (noise_adder.add_gaussian_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
|
122 |
+
'laplacian': (noise_adder.add_laplacian_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
|
123 |
+
'salt_and_pepper': (noise_adder.add_salt_and_pepper_noise, {'X_hat': X_hat, 'noise_level': noise_level}),
|
124 |
+
'block': (noise_adder.add_block_noise, {'X_hat': X_hat, 'block_size': noise_level, 'img_width': self.img_size[0]//reduce})
|
125 |
+
}
|
126 |
+
# Map the noise type to the noise function
|
127 |
+
noise_func, args = noise_dict.get(noise_type, (noise_adder.add_uniform_noise, {'X_hat': X_hat, 'noise_level': noise_level}))
|
128 |
+
# Add noise to the data
|
129 |
+
_, X_noise = noise_func(**args)
|
130 |
+
return X_noise
|
131 |
+
|
132 |
+
def scale(self, X_hat: np.ndarray, X_noise: np.ndarray, scaler: Union[MinMaxScaler, StandardScaler]) -> Tuple[np.ndarray, np.ndarray]:
|
133 |
+
"""
|
134 |
+
Scale the data.
|
135 |
+
|
136 |
+
Parameters:
|
137 |
+
- X_hat (np.ndarray): The data matrix.
|
138 |
+
- X_noise (np.ndarray): The noisy data matrix.
|
139 |
+
- scaler (MinMaxScaler or StandardScaler): Scaler to use for scaling the data.
|
140 |
+
|
141 |
+
Returns:
|
142 |
+
- X_hat_scaled (np.ndarray): The scaled data matrix.
|
143 |
+
- X_noise_scaled (np.ndarray): The scaled noisy data matrix.
|
144 |
+
"""
|
145 |
+
# Scale the data
|
146 |
+
X_hat_scaled = scaler.fit_transform(X_hat)
|
147 |
+
X_noise_scaled = scaler.transform(X_noise)
|
148 |
+
# Ensure that the scaled noisy data is non-negative
|
149 |
+
X_noise_scaled += np.abs(np.min(X_noise_scaled)) * np.abs(np.min(X_noise_scaled)) * int(np.min(X_noise_scaled) < 0)
|
150 |
+
return X_hat_scaled, X_noise_scaled
|
151 |
+
|
152 |
+
class Pipeline(BasicBlock):
|
153 |
+
def __init__(self, nmf: Union[str, BasicNMF], dataset: str='ORL', reduce: int=1, noise_type: str='uniform',
|
154 |
+
noise_level: float=0.02, random_state: int=3407, scaler: str='MinMax') -> None:
|
155 |
+
"""
|
156 |
+
Initialize the pipeline.
|
157 |
+
|
158 |
+
Parameters:
|
159 |
+
- nmf (str or BasicNMF): Name of the NMF algorithm to use.
|
160 |
+
- dataset (str): Name of the dataset to use.
|
161 |
+
- reduce (int): Factor by which the image size is reduced for visualization.
|
162 |
+
- noise_type (str): Type of noise to add to the data.
|
163 |
+
- noise_level (float): Level of noise to add to the data.
|
164 |
+
- random_state (int): Random state to use for the NMF algorithm.
|
165 |
+
- scaler (str): Name of the scaler to use for scaling the data.
|
166 |
+
|
167 |
+
Returns:
|
168 |
+
None. The function will initialize the pipeline.
|
169 |
+
"""
|
170 |
+
# Get the basic information for the pipeline
|
171 |
+
folder, scaler, self.nmf = self.basic_info(nmf, dataset, scaler)
|
172 |
+
# Load the data
|
173 |
+
X_hat, self.__Y_hat, self.img_size = self.load_data(folder, reduce=reduce, random_state=random_state)
|
174 |
+
# Add noise to the data
|
175 |
+
X_noise = self.add_noise(X_hat, noise_type, noise_level, random_state, reduce)
|
176 |
+
# Scale the data
|
177 |
+
self.__X_hat_scaled, self.__X_noise_scaled = self.scale(X_hat, X_noise, scaler)
|
178 |
+
self.reduce = reduce
|
179 |
+
self.random_state = random_state
|
180 |
+
# Delete the attributes that might occupy significant memory
|
181 |
+
del X_hat, X_noise, folder, scaler, noise_type, noise_level, random_state, dataset, reduce, nmf
|
182 |
+
|
183 |
+
def execute(self, max_iter: int, convergence_trend: bool=False, matrix_size: bool=False, verbose: bool=False) -> None:
|
184 |
+
"""
|
185 |
+
Run the pipeline.
|
186 |
+
|
187 |
+
Parameters:
|
188 |
+
- max_iter (int): Maximum number of iterations to run the NMF algorithm.
|
189 |
+
- convergence_trend (bool): Whether to display the convergence trend of the NMF algorithm.
|
190 |
+
- matrix_size (bool): Whether to display the size of the basis and coefficient matrices.
|
191 |
+
- verbose (bool): Whether to display the verbose output of the NMF algorithm.
|
192 |
+
"""
|
193 |
+
# Run NMF
|
194 |
+
self.nmf.fit(self.__X_noise_scaled, len(set(self.__Y_hat)), max_iter=max_iter,
|
195 |
+
random_state=self.random_state, imshow=convergence_trend, verbose=verbose)
|
196 |
+
# Get the dictionary and representation matrices
|
197 |
+
self.D, self.R = self.nmf.D, self.nmf.R
|
198 |
+
if matrix_size:
|
199 |
+
print('D.shape={}, R.shape={}'.format(self.D.shape, self.R.shape))
|
200 |
+
self.metrics = self.nmf.evaluate(self.__X_hat_scaled, self.__Y_hat, random_state=self.random_state)
|
201 |
+
return self.metrics
|
202 |
+
|
203 |
+
def evaluate(self, idx: int=2, imshow: bool=False) -> None:
|
204 |
+
"""
|
205 |
+
Evaluate the NMF algorithm.
|
206 |
+
|
207 |
+
Parameters:
|
208 |
+
- idx (int): Index of the image to evaluate.
|
209 |
+
- imshow (bool): Whether to display the images.
|
210 |
+
"""
|
211 |
+
evaluate(self.nmf, self.metrics, self.__X_hat_scaled, self.__X_noise_scaled,
|
212 |
+
self.img_size, self.reduce, idx, imshow)
|
213 |
+
|
214 |
+
def visualization(self, idx: int=2) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
|
215 |
+
"""
|
216 |
+
Visualize the NMF algorithm.
|
217 |
+
|
218 |
+
Parameters:
|
219 |
+
- idx (int): Index of the image to visualize.
|
220 |
+
|
221 |
+
Returns:
|
222 |
+
- X_i (np.ndarray): The original image.
|
223 |
+
- X_noise_i (np.ndarray): The noisy image.
|
224 |
+
- DR_i (np.ndarray): The reconstructed image.
|
225 |
+
"""
|
226 |
+
DR = np.dot(self.D, self.R).reshape(self.__X_hat_scaled.shape[0], self.__X_hat_scaled.shape[1])
|
227 |
+
# Calculate reduced image size based on the 'reduce' factor
|
228 |
+
img_size = [i//self.reduce for i in self.img_size]
|
229 |
+
# Retrieve the specified image from the data
|
230 |
+
X_i = self.__X_hat_scaled[:,idx].reshape(img_size[1],img_size[0])
|
231 |
+
X_noise_i = self.__X_noise_scaled[:,idx].reshape(img_size[1],img_size[0])
|
232 |
+
DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
|
233 |
+
return X_i, X_noise_i, DR_i
|
234 |
+
|
235 |
+
def cleanup(self) -> None:
|
236 |
+
"""
|
237 |
+
Cleanup method to release resources and delete instances.
|
238 |
+
"""
|
239 |
+
# Delete attributes that might occupy significant memory
|
240 |
+
if hasattr(self, 'nmf'):
|
241 |
+
del self.nmf, self.__X_hat_scaled, self.__X_noise_scaled, self.D, self.R, self.metrics
|
242 |
+
|
243 |
+
class Experiment:
|
244 |
+
"""
|
245 |
+
Set up the experiment.
|
246 |
+
"""
|
247 |
+
data_dirs = ['data/ORL', 'data/CroppedYaleB']
|
248 |
+
data_container = [[], []]
|
249 |
+
noises = {
|
250 |
+
'uniform': [0.1, 0.3],
|
251 |
+
'gaussian': [0.05, 0.08],
|
252 |
+
'laplacian': [0.04, 0.06],
|
253 |
+
'salt_and_pepper': [0.02, 0.1],
|
254 |
+
'block': [10, 15],}
|
255 |
+
|
256 |
+
nmf_dict = {
|
257 |
+
'L2NormNMF': L2NormNMF,
|
258 |
+
'KLDivergenceNMF': KLDivergenceNMF,
|
259 |
+
'ISDivergenceNMF': ISDivergenceNMF,
|
260 |
+
'L21NormNMF': L21NormNMF,
|
261 |
+
'HSCostNMF': HSCostNMF,
|
262 |
+
'L1NormRegularizedNMF': L1NormRegularizedNMF,
|
263 |
+
'CappedNormNMF': CappedNormNMF,
|
264 |
+
'CauchyNMF': CauchyNMF,}
|
265 |
+
|
266 |
+
def __init__(self,
|
267 |
+
seeds: List[int]=None) -> None:
|
268 |
+
"""
|
269 |
+
Initialize the experiment.
|
270 |
+
|
271 |
+
Parameters:
|
272 |
+
- seeds (List[int]): Random seeds to use for the experiment.
|
273 |
+
"""
|
274 |
+
self.seeds = [0, 42, 99, 512, 3407] if seeds is None else seeds
|
275 |
+
|
276 |
+
def choose(self, nmf: Union[str, BasicNMF]) -> None:
|
277 |
+
"""
|
278 |
+
Choose an NMF algorithm. Essentially, this method sets the NMF algorithm to use for the experiment.
|
279 |
+
|
280 |
+
nmf (Union[str, BasicNMF]): NMF algorithm to use.
|
281 |
+
"""
|
282 |
+
if isinstance(nmf, BasicNMF):
|
283 |
+
self.nmf = nmf
|
284 |
+
else:
|
285 |
+
# Choose an NMF algorithm
|
286 |
+
self.nmf = self.nmf_dict.get(nmf, L1NormRegularizedNMF)()
|
287 |
+
|
288 |
+
def data_loader(self) -> Generator[Tuple[str, int, np.ndarray, np.ndarray, np.ndarray, str, float], None, None]:
|
289 |
+
"""
|
290 |
+
Construct a generator to load the data.
|
291 |
+
|
292 |
+
Returns:
|
293 |
+
- data_file (str): Name of the dataset.
|
294 |
+
- seed (int): Random seed to use for the experiment.
|
295 |
+
- X_hat_scaled (np.ndarray): The scaled data matrix.
|
296 |
+
- Y_hat (np.ndarray): The label matrix.
|
297 |
+
- X_noise_scaled (np.ndarray): The scaled noisy data matrix.
|
298 |
+
- noise_type (str): Type of noise to add to the data.
|
299 |
+
- noise_level (float): Level of noise to add to the data.
|
300 |
+
"""
|
301 |
+
scaler = MinMaxScaler()
|
302 |
+
# Data file loop
|
303 |
+
for data_file in self.data_dirs:
|
304 |
+
reduce = 1 if data_file.endswith('ORL') else 3
|
305 |
+
image_size = get_image_size(data_file)
|
306 |
+
X_hat_, Y_hat_ = load_data(root=data_file, reduce=reduce)
|
307 |
+
# Random seed loop
|
308 |
+
for seed in self.seeds:
|
309 |
+
noise_adder = NoiseAdder(random_state=seed)
|
310 |
+
X_hat, Y_hat = random_sample(X_hat_, Y_hat_, 0.9, random_state=seed)
|
311 |
+
X_hat_scaled = scaler.fit_transform(X_hat)
|
312 |
+
# Noise type loop
|
313 |
+
for noise_type in self.noises:
|
314 |
+
add_noise_ = getattr(noise_adder, f'add_{noise_type}_noise')
|
315 |
+
# Noise level loop
|
316 |
+
for noise_level in self.noises[noise_type]:
|
317 |
+
_, X_noise = add_noise_(X_hat, noise_level=noise_level) if noise_type != 'block' else add_noise_(X_hat, image_size[0]//reduce, noise_level)
|
318 |
+
X_noise_scaled = scaler.transform(X_noise)
|
319 |
+
X_noise_scaled += np.abs(np.min(X_noise_scaled)) * np.abs(np.min(X_noise_scaled)) * int(np.min(X_noise_scaled) < 0)
|
320 |
+
yield data_file.split("/")[-1], seed, X_hat_scaled, Y_hat, X_noise_scaled, noise_type, noise_level
|
321 |
+
|
322 |
+
def sync_fit(self, dataset: str, seed: int, X_hat_scaled: np.ndarray, Y_hat: np.ndarray, X_noise_scaled: np.ndarray, noise_type: str, noise_level: float) -> Tuple[str, str, float, int, float, float, float]:
|
323 |
+
"""
|
324 |
+
Fit the NMF algorithm on the dataset with noise synchronously.
|
325 |
+
|
326 |
+
Parameters:
|
327 |
+
- dataset (str): Name of the dataset.
|
328 |
+
- seed (int): Random seed to use for the experiment.
|
329 |
+
- X_hat_scaled (np.ndarray): The scaled data matrix.
|
330 |
+
- Y_hat (np.ndarray): The label matrix.
|
331 |
+
- X_noise_scaled (np.ndarray): The scaled noisy data matrix.
|
332 |
+
- noise_type (str): Type of noise to add to the data.
|
333 |
+
- noise_level (float): Level of noise to add to the data.
|
334 |
+
|
335 |
+
Returns:
|
336 |
+
- dataset (str): Name of the dataset.
|
337 |
+
- noise_type (str): Type of noise to add to the data.
|
338 |
+
- noise_level (float): Level of noise to add to the data.
|
339 |
+
- seed (int): Random seed to use for the experiment.
|
340 |
+
- rmse (float): Root mean squared error of the NMF algorithm.
|
341 |
+
- acc (float): Accuracy of the NMF algorithm.
|
342 |
+
- nmi (float): Normalized mutual information of the NMF algorithm.
|
343 |
+
"""
|
344 |
+
self.nmf.fit(X_noise_scaled, len(set(Y_hat)), random_state=seed, verbose=False)
|
345 |
+
# Display the current experiment information
|
346 |
+
logging.info(f'Dataset: {dataset} Random seed: {seed} - Test on {noise_type} with {noise_level} ended.')
|
347 |
+
return dataset, noise_type, noise_level, seed, *self.nmf.evaluate(X_hat_scaled, Y_hat, random_state=seed)
|
348 |
+
|
349 |
+
def execute(self) -> None:
|
350 |
+
"""
|
351 |
+
Execute the experiments.
|
352 |
+
"""
|
353 |
+
# Lazy import to avoid multiprocessing error
|
354 |
+
import multiprocessing
|
355 |
+
results = []
|
356 |
+
# Define the multiprocessing pool
|
357 |
+
with multiprocessing.Pool(10) as pool:
|
358 |
+
for result in pool.starmap(self.sync_fit, self.data_loader()):
|
359 |
+
# Append the result to the list
|
360 |
+
results.append(result)
|
361 |
+
# Write the results to a csv file
|
362 |
+
if not os.path.exists(f'{self.nmf.name}_log.csv'):
|
363 |
+
mode = 'w'
|
364 |
+
else:
|
365 |
+
mode = 'a'
|
366 |
+
with open(f'{self.nmf.name}_log.csv', mode) as f:
|
367 |
+
writer = csv.writer(f)
|
368 |
+
if mode == 'w':
|
369 |
+
writer.writerow(['dataset', 'noise_type', 'noise_level', 'seed', 'rmse', 'acc', 'nmi'])
|
370 |
+
for result in results:
|
371 |
+
writer.writerow(result)
|
algorithm/preprocess.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Union, Tuple
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
class MinMaxScaler:
|
6 |
+
"""
|
7 |
+
This class scales and transforms features to [0, 1].
|
8 |
+
"""
|
9 |
+
def fit(self, X: np.ndarray) -> None:
|
10 |
+
"""
|
11 |
+
Compute the minimum and the range of the data for later scaling.
|
12 |
+
|
13 |
+
Parameters:
|
14 |
+
- X: numpy array-like, shape (n_samples, n_features)
|
15 |
+
The data used to compute the minimum and range used for later scaling.
|
16 |
+
"""
|
17 |
+
self.min_ = np.min(X, axis=0)
|
18 |
+
self.range_ = np.max(X, axis=0) - self.min_
|
19 |
+
|
20 |
+
|
21 |
+
def transform(self, X: np.ndarray) -> np.ndarray:
|
22 |
+
"""
|
23 |
+
Scale the data using the values computed during the fit method.
|
24 |
+
|
25 |
+
Parameters:
|
26 |
+
- X: numpy array-like, shape (n_samples, n_features)
|
27 |
+
Input data that needs to be scaled.
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
- numpy array, shape (n_samples, n_features)
|
31 |
+
Transformed data.
|
32 |
+
"""
|
33 |
+
return (X - self.min_) / self.range_
|
34 |
+
|
35 |
+
def fit_transform(self, X: np.ndarray) -> np.ndarray:
|
36 |
+
"""
|
37 |
+
Fit to the data and then transform it.
|
38 |
+
|
39 |
+
Parameters:
|
40 |
+
- X: numpy array-like, shape (n_samples, n_features)
|
41 |
+
Input data that needs to be scaled and transformed.
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
- numpy array, shape (n_samples, n_features)
|
45 |
+
Transformed data.
|
46 |
+
"""
|
47 |
+
self.fit(X)
|
48 |
+
return self.transform(X)
|
49 |
+
|
50 |
+
class StandardScaler:
|
51 |
+
"""
|
52 |
+
This class standardizes features by removing the mean and scaling to unit variance.
|
53 |
+
"""
|
54 |
+
def fit(self, X: np.ndarray) -> None:
|
55 |
+
"""
|
56 |
+
Compute the mean and standard deviation of the data for later standardization.
|
57 |
+
|
58 |
+
Parameters:
|
59 |
+
- X: numpy array-like, shape (n_samples, n_features)
|
60 |
+
The data used to compute the mean and standard deviation used for later standardization.
|
61 |
+
"""
|
62 |
+
self.mean_ = np.mean(X, axis=0)
|
63 |
+
self.std_ = np.std(X, axis=0)
|
64 |
+
|
65 |
+
def transform(self, X: np.ndarray) -> np.ndarray:
|
66 |
+
"""
|
67 |
+
Standardize the data using the values computed during the fit method.
|
68 |
+
|
69 |
+
Parameters:
|
70 |
+
- X: numpy array-like, shape (n_samples, n_features)
|
71 |
+
Input data that needs to be standardized.
|
72 |
+
|
73 |
+
Returns:
|
74 |
+
- numpy array, shape (n_samples, n_features)
|
75 |
+
Transformed data.
|
76 |
+
"""
|
77 |
+
return (X - self.mean_) / self.std_
|
78 |
+
|
79 |
+
def fit_transform(self, X: np.ndarray) -> np.ndarray:
|
80 |
+
"""
|
81 |
+
Fit to the data and then transform it.
|
82 |
+
|
83 |
+
Parameters:
|
84 |
+
- X: numpy array-like, shape (n_samples, n_features)
|
85 |
+
Input data that needs to be standardized and transformed.
|
86 |
+
|
87 |
+
Returns:
|
88 |
+
- numpy array, shape (n_samples, n_features)
|
89 |
+
Transformed data.
|
90 |
+
"""
|
91 |
+
self.fit(X)
|
92 |
+
return self.transform(X)
|
93 |
+
|
94 |
+
class NoiseAdder:
|
95 |
+
"""
|
96 |
+
This class adds noise to data.
|
97 |
+
"""
|
98 |
+
def __init__(self, random_state: Union[int, np.random.RandomState, None]=None) -> None:
|
99 |
+
"""
|
100 |
+
Initializes the NoiseAdder with a random state and noise parameters.
|
101 |
+
|
102 |
+
Parameters:
|
103 |
+
- random_state (int or RandomState instance or None): Controls the randomness. If int, is used as seed for RandomState.
|
104 |
+
- noise_params (dict): Additional noise parameters.
|
105 |
+
"""
|
106 |
+
self.rng = np.random.RandomState(random_state)
|
107 |
+
|
108 |
+
def add_uniform_noise(self, X_hat: np.ndarray, noise_level: int=0.1) -> Tuple[np.ndarray, np.ndarray]:
|
109 |
+
"""
|
110 |
+
Add uniform random noise to data.
|
111 |
+
|
112 |
+
Parameters:
|
113 |
+
- X_hat (numpy array): Original data.
|
114 |
+
|
115 |
+
Returns:
|
116 |
+
- Numpy array of uniform noise.
|
117 |
+
- Numpy array with added uniform noise.
|
118 |
+
"""
|
119 |
+
a, b = 0, 1
|
120 |
+
# Generate noise
|
121 |
+
X_noise = self.rng.uniform(a, b, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
|
122 |
+
return X_noise, X_hat + X_noise
|
123 |
+
|
124 |
+
def add_gaussian_noise(self, X_hat, noise_level=0.1):
|
125 |
+
"""
|
126 |
+
Add Gaussian noise to data.
|
127 |
+
|
128 |
+
Parameters:
|
129 |
+
- X_hat (numpy array): Original data.
|
130 |
+
- mean (float): Mean of the Gaussian distribution.
|
131 |
+
- std (float): Standard deviation of the Gaussian distribution.
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
- Numpy array of Gaussian noise.
|
135 |
+
- Numpy array with added Gaussian noise.
|
136 |
+
"""
|
137 |
+
mean, std = 0, 1
|
138 |
+
# Generate noise
|
139 |
+
X_noise = self.rng.normal(mean, std, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
|
140 |
+
return X_noise, X_hat + X_noise
|
141 |
+
|
142 |
+
def add_laplacian_noise(self, X_hat, noise_level=0.1):
|
143 |
+
"""
|
144 |
+
Add Laplacian noise to data.
|
145 |
+
|
146 |
+
Parameters:
|
147 |
+
- X_hat (numpy array): Original data.
|
148 |
+
- mu (float): Location parameter for the Laplacian distribution.
|
149 |
+
- lambd (float): Scale (diversity) parameter for the Laplacian distribution.
|
150 |
+
|
151 |
+
Returns:
|
152 |
+
- Numpy array of Laplacian noise.
|
153 |
+
- Numpy array with added Laplacian noise.
|
154 |
+
"""
|
155 |
+
# Initialize parameters
|
156 |
+
mu, lambd = 0, 1
|
157 |
+
# Generate noise
|
158 |
+
X_noise = self.rng.laplace(mu, lambd, size=X_hat.shape) * noise_level * np.max(X_hat)
|
159 |
+
return X_noise, X_hat + X_noise
|
160 |
+
|
161 |
+
def add_block_noise(self, X_hat: np.ndarray, img_width: int, block_size: int=10) -> Tuple[np.ndarray, np.ndarray]:
|
162 |
+
"""
|
163 |
+
Add block noise to multiple flattened image samples.
|
164 |
+
|
165 |
+
Parameters:
|
166 |
+
- X (numpy array): Array of shape (m, n) where m is flattened image length and n is number of samples
|
167 |
+
- img_width (int): width of the original image
|
168 |
+
- block_size (int): size of the block to occlude
|
169 |
+
|
170 |
+
Returns:
|
171 |
+
- Numpy array of noise added to each sample
|
172 |
+
- Numpy array with added block noise for all samples
|
173 |
+
"""
|
174 |
+
# Initalize parameters
|
175 |
+
X = X_hat.copy()
|
176 |
+
m, n_samples = X.shape
|
177 |
+
X_noise = np.zeros((m, n_samples), dtype=np.uint8)
|
178 |
+
# For each sample in X
|
179 |
+
for i in range(n_samples):
|
180 |
+
sample = X[:, i]
|
181 |
+
# Reshape the flattened array to 2D
|
182 |
+
img_2d = sample.reshape(-1, img_width)
|
183 |
+
height, width = img_2d.shape
|
184 |
+
# Ensure the block size isn't larger than the image dimensions
|
185 |
+
block_size = min(block_size, width, height)
|
186 |
+
# Generate a random starting point for the block
|
187 |
+
x_start = self.rng.randint(0, width - block_size)
|
188 |
+
y_start = self.rng.randint(0, height - block_size)
|
189 |
+
# Add block noise
|
190 |
+
img_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
|
191 |
+
# Store the noise block to noise array
|
192 |
+
noise_2d = np.zeros((height, width), dtype=np.uint8)
|
193 |
+
noise_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
|
194 |
+
X_noise[:, i] = noise_2d.ravel()
|
195 |
+
# Flatten the array back to 1D and store back in X
|
196 |
+
X[:, i] = img_2d.ravel()
|
197 |
+
return X_noise, X
|
198 |
+
|
199 |
+
def add_salt_and_pepper_noise(self, X_hat, noise_level=0.02, salt_ratio=0.5) -> Tuple[np.ndarray, np.ndarray]:
|
200 |
+
"""
|
201 |
+
Add "salt and pepper" noise to data.
|
202 |
+
|
203 |
+
Parameters:
|
204 |
+
- X_hat (numpy array): Original data.
|
205 |
+
- amount (float): Proportion of image pixels to be replaced.
|
206 |
+
- salt_ratio (float): Proportion of replaced pixels that are "salt".
|
207 |
+
|
208 |
+
Returns:
|
209 |
+
- Numpy array of salt and pepper noise.
|
210 |
+
- Numpy array with added salt and pepper noise.
|
211 |
+
"""
|
212 |
+
# Initialize parameters
|
213 |
+
X = X_hat.copy()
|
214 |
+
X_noise = np.zeros_like(X)
|
215 |
+
# Get the total number of pixels that should be replaced by noise
|
216 |
+
total_pixels = X.size
|
217 |
+
num_noise_pixels = int(total_pixels * noise_level)
|
218 |
+
# Separate the number of salt and pepper pixels based on the salt_ratio
|
219 |
+
num_salt = int(num_noise_pixels * salt_ratio)
|
220 |
+
num_pepper = num_noise_pixels - num_salt
|
221 |
+
# Directly generate the noise coordinates without overlap
|
222 |
+
noise_coords = self.rng.choice(total_pixels, num_noise_pixels, replace=False)
|
223 |
+
salt_coords = noise_coords[:num_salt]
|
224 |
+
pepper_coords = noise_coords[num_salt:]
|
225 |
+
# Convert the 1D noise coordinates back to tuple of N-dim coordinates
|
226 |
+
salt_coords = np.unravel_index(salt_coords, X.shape)
|
227 |
+
pepper_coords = np.unravel_index(pepper_coords, X.shape)
|
228 |
+
# Set salt and pepper pixels in the image
|
229 |
+
max_pixel_val = np.max(X)
|
230 |
+
X_noise[salt_coords] = max_pixel_val
|
231 |
+
X_noise[pepper_coords] = 0
|
232 |
+
X[salt_coords] = max_pixel_val
|
233 |
+
X[pepper_coords] = 0
|
234 |
+
return X_noise, X
|
algorithm/sample.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Union
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
def random_sample(X: np.ndarray, Y: np.ndarray, fraction: float=0.90, random_state: Union[int, np.random.RandomState, None]=None) -> np.ndarray:
|
6 |
+
"""
|
7 |
+
Randomly sample a fraction of the data.
|
8 |
+
|
9 |
+
Parameters:
|
10 |
+
- X (numpy.ndarray): The input data matrix of shape (n_features, n_samples)
|
11 |
+
where n_samples is the number of samples, and n_features
|
12 |
+
is the number of features.
|
13 |
+
- Y (numpy.ndarray): The output data matrix of shape (n_samples, )
|
14 |
+
- fraction (float): The fraction of the data to be sampled.
|
15 |
+
- random_state (int): The seed for the random number generator.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
- X_sample (numpy.ndarray): The sampled data matrix of shape (n_features, n_samples)
|
19 |
+
where n_samples is the number of samples, and n_features
|
20 |
+
|
21 |
+
"""
|
22 |
+
|
23 |
+
# Create a random number generator
|
24 |
+
rng = np.random.default_rng(random_state)
|
25 |
+
|
26 |
+
# Compute the number of samples to be drawn
|
27 |
+
n_samples = X.shape[1]
|
28 |
+
sample_size = int(fraction * n_samples)
|
29 |
+
|
30 |
+
# Randomly sample the indices
|
31 |
+
sampled_indices = rng.choice(n_samples, sample_size, replace=False)
|
32 |
+
|
33 |
+
# Use the sampled indices to extract columns from the original data
|
34 |
+
X_sample = X[:, sampled_indices]
|
35 |
+
Y_sample = Y[sampled_indices]
|
36 |
+
|
37 |
+
return X_sample, Y_sample
|
algorithm/user_evaluate.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Tuple
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
from algorithm.nmf import BasicNMF
|
6 |
+
from algorithm.visualize import origin_versus_dictrep
|
7 |
+
|
8 |
+
def evaluate(nmf: BasicNMF, metrics: Tuple, X: np.ndarray, X_noise: np.ndarray,
|
9 |
+
image_size: tuple, reduce: int, idx=2, imshow: bool=False) -> None:
|
10 |
+
|
11 |
+
"""
|
12 |
+
Evaluate the performance of NMF algorithms.
|
13 |
+
|
14 |
+
Parameters
|
15 |
+
- nmf (BasicNMF): The NMF algorithm.
|
16 |
+
- metrics (tuple): The evaluation metrics, (rmse, acc, nmi).
|
17 |
+
- X (numpy.ndarray): The original data matrix, shape (n_samples, n_features).
|
18 |
+
- X_noise (numpy.ndarray): The noisy data matrix, shape (n_samples, n_features).
|
19 |
+
- image_size (tuple): The size of images.
|
20 |
+
- reduce (int): The reduction ratio of images.
|
21 |
+
- idx (int): The index of the image to be visualized.
|
22 |
+
- random_state (int): The random state.
|
23 |
+
"""
|
24 |
+
# Start to evaluate
|
25 |
+
print('Evaluating...')
|
26 |
+
rmse, acc, nmi = metrics
|
27 |
+
# Visualize
|
28 |
+
print('RMSE = {:.4f}'.format(rmse))
|
29 |
+
print('Accuracy = {:.4f}'.format(acc))
|
30 |
+
print('NMI = {:.4f}'.format(nmi))
|
31 |
+
if imshow:
|
32 |
+
origin_versus_dictrep(X, nmf.D, nmf.R, X_noise, image_size=image_size, reduce=reduce, idx=idx)
|
algorithm/visualize.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
|
4 |
+
def origin_plus_noise(X_hat: np.ndarray, X_noise: np.ndarray, X: np.ndarray, image_size: tuple, reduce: int, idx: int=2) -> None:
|
5 |
+
"""
|
6 |
+
Display the original image, the noise, and the image with added noise side by side.
|
7 |
+
|
8 |
+
Parameters:
|
9 |
+
- X_hat (numpy.ndarray): Original image data.
|
10 |
+
- X_noise (numpy.ndarray): Noise data to be added to the original image.
|
11 |
+
- image_size (tuple): Size of the original image as (height, width).
|
12 |
+
- reduce (int): Factor to downscale the image dimensions.
|
13 |
+
- idx (int, optional): Index of the image to be displayed. Default is 2.
|
14 |
+
"""
|
15 |
+
|
16 |
+
# Calculate reduced image size based on the 'reduce' factor
|
17 |
+
img_size = [i//reduce for i in image_size]
|
18 |
+
|
19 |
+
# Retrieve the specified image from the data
|
20 |
+
X_hat_i = X_hat[:,idx].reshape(img_size[1],img_size[0])
|
21 |
+
X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
|
22 |
+
X_i = X[:,idx].reshape(img_size[1],img_size[0])
|
23 |
+
|
24 |
+
# Set up the figure for displaying images
|
25 |
+
plt.figure(figsize=(12,3)) # Adjusted size for better visualization
|
26 |
+
|
27 |
+
# Display the original image
|
28 |
+
plt.subplot(151) # Adjusted to 1x4 grid for space to '+' and '=' symbols
|
29 |
+
plt.imshow(X_hat_i, cmap=plt.cm.gray)
|
30 |
+
plt.title('Image(Original)')
|
31 |
+
plt.axis('off') # Hide axis for a cleaner look
|
32 |
+
|
33 |
+
# Place '+' symbol between images
|
34 |
+
plt.subplot(152)
|
35 |
+
plt.text(0.5, 0.5, '+', fontsize=20, ha='center', va='center')
|
36 |
+
plt.axis('off') # Hide axis
|
37 |
+
|
38 |
+
# Display the noise
|
39 |
+
plt.subplot(153)
|
40 |
+
plt.imshow(X_noise_i, cmap=plt.cm.gray)
|
41 |
+
plt.title('Noise')
|
42 |
+
plt.axis('off') # Hide axis for a cleaner look
|
43 |
+
|
44 |
+
# Place '=' symbol between images
|
45 |
+
plt.subplot(154)
|
46 |
+
plt.text(0.5, 0.5, '=', fontsize=20, ha='center', va='center')
|
47 |
+
plt.axis('off') # Hide axis
|
48 |
+
|
49 |
+
# Display the image with added noise
|
50 |
+
plt.subplot(155)
|
51 |
+
plt.imshow(X_i, cmap=plt.cm.gray)
|
52 |
+
plt.title('Image(Noise)')
|
53 |
+
plt.axis('off') # Hide axis for a cleaner look
|
54 |
+
|
55 |
+
# Render the figure
|
56 |
+
plt.tight_layout() # Ensure no overlap between subplots
|
57 |
+
plt.show()
|
58 |
+
|
59 |
+
def origin_versus_dictrep(X: np.ndarray, D: np.ndarray, R: np.ndarray, X_noise: np.ndarray, image_size: tuple, reduce: int, idx: int) -> None:
|
60 |
+
"""
|
61 |
+
Display the original, noise-added, and dictionary-reconstructed images side by side.
|
62 |
+
|
63 |
+
Parameters:
|
64 |
+
- X (numpy.ndarray): Original data matrix of shape (n_samples, n_features).
|
65 |
+
- D (numpy.ndarray): Basis matrix obtained from dictionary learning.
|
66 |
+
- R (numpy.ndarray): Coefficient matrix.
|
67 |
+
- X_noise (numpy.ndarray): Noise-added version of the original data matrix.
|
68 |
+
- image_size (tuple): Tuple containing the height and width of the image.
|
69 |
+
- reduce (int): Factor by which the image size is reduced for visualization.
|
70 |
+
- idx (int): Index of the image to display.
|
71 |
+
|
72 |
+
Returns:
|
73 |
+
None. The function will plot and display the images using matplotlib.
|
74 |
+
"""
|
75 |
+
|
76 |
+
DR = np.dot(D, R).reshape(X.shape[0], X.shape[1])
|
77 |
+
# Calculate reduced image size based on the 'reduce' factor
|
78 |
+
img_size = [i//reduce for i in image_size]
|
79 |
+
|
80 |
+
# Retrieve the specified image from the data
|
81 |
+
X_i = X[:,idx].reshape(img_size[1],img_size[0])
|
82 |
+
X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
|
83 |
+
DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
|
84 |
+
|
85 |
+
# Set up the figure for displaying images
|
86 |
+
plt.figure(figsize=(12,3)) # Adjusted size for better visualization
|
87 |
+
|
88 |
+
# Display the original image
|
89 |
+
plt.subplot(131)
|
90 |
+
plt.imshow(X_i, cmap=plt.cm.gray)
|
91 |
+
plt.title('Image(Original)')
|
92 |
+
plt.axis('off')
|
93 |
+
|
94 |
+
# Display the reconstructed image
|
95 |
+
plt.subplot(132)
|
96 |
+
plt.imshow(X_noise_i, cmap=plt.cm.gray)
|
97 |
+
plt.title('Image(Noise)')
|
98 |
+
plt.axis('off')
|
99 |
+
|
100 |
+
# Display the sparse coefficients
|
101 |
+
plt.subplot(133)
|
102 |
+
plt.imshow(DR_i, cmap=plt.cm.gray)
|
103 |
+
plt.title('Image(Reconstructed))')
|
104 |
+
plt.axis('off')
|
105 |
+
|
106 |
+
# Render the figure
|
107 |
+
plt.tight_layout()
|
108 |
+
plt.show()
|
109 |
+
|
110 |
+
return X_i, X_noise_i, DR_i
|
111 |
+
|
112 |
+
def origin_noise_dictrep(X: np.ndarray, X_noise: np.ndarray, D: np.ndarray, R: np.ndarray, image_size: tuple, reduce: int, idx: int) -> None:
|
113 |
+
"""
|
114 |
+
Display the original image, its noise version, and its dictionary-reconstructed representation side by side.
|
115 |
+
|
116 |
+
Parameters:
|
117 |
+
- X (numpy.ndarray): Original data matrix of shape (n_samples, n_features).
|
118 |
+
- X_noise (numpy.ndarray): Noise-added version of the original data matrix.
|
119 |
+
- D (numpy.ndarray): Basis matrix obtained from dictionary learning.
|
120 |
+
- R (numpy.ndarray): Coefficient matrix.
|
121 |
+
- image_size (tuple): Tuple containing the height and width of the image.
|
122 |
+
- reduce (int): Factor by which the image size is reduced for visualization.
|
123 |
+
- idx (int): Index of the image to display.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
None. The function will plot and display the images using matplotlib.
|
127 |
+
"""
|
128 |
+
|
129 |
+
DR = np.dot(D, R).reshape(X.shape[0], X.shape[1])
|
130 |
+
# Calculate reduced image size based on the 'reduce' factor
|
131 |
+
img_size = [i//reduce for i in image_size]
|
132 |
+
|
133 |
+
# Retrieve the specified image from the data
|
134 |
+
X_i = X[:,idx].reshape(img_size[1],img_size[0])
|
135 |
+
X_noise_i = X_noise[:,idx].reshape(img_size[1],img_size[0])
|
136 |
+
DR_i = DR[:,idx].reshape(img_size[1],img_size[0])
|
137 |
+
|
138 |
+
# Set up the figure for displaying images
|
139 |
+
plt.figure(figsize=(12,3)) # Adjusted size for better visualization
|
140 |
+
|
141 |
+
# Display the original image
|
142 |
+
plt.subplot(131)
|
143 |
+
plt.imshow(X_i, cmap=plt.cm.gray)
|
144 |
+
plt.title('Image(Original)')
|
145 |
+
plt.axis('off')
|
146 |
+
|
147 |
+
# Display the noise
|
148 |
+
plt.subplot(132)
|
149 |
+
plt.imshow(X_noise_i, cmap=plt.cm.gray)
|
150 |
+
plt.title('Image(Noise)')
|
151 |
+
plt.axis('off')
|
152 |
+
|
153 |
+
# Display the reconstructed image
|
154 |
+
plt.subplot(133)
|
155 |
+
plt.imshow(DR_i, cmap=plt.cm.gray)
|
156 |
+
plt.title('Image(Reconstructed)')
|
157 |
+
plt.axis('off')
|
158 |
+
|
159 |
+
# Render the figure
|
160 |
+
plt.tight_layout()
|
161 |
+
plt.show()
|
app.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import PIL
|
2 |
+
import numpy as np
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
from algorithm.pipeline import Pipeline
|
6 |
+
|
7 |
+
class App:
|
8 |
+
def __init__(self,
|
9 |
+
nmf='L1NormRegularizedNMF',
|
10 |
+
dataset='YaleB',
|
11 |
+
reduce=3,
|
12 |
+
noise_type='salt_and_pepper',
|
13 |
+
noise_level=0.10,
|
14 |
+
random_state=99,
|
15 |
+
scaler='MinMax'):
|
16 |
+
self.pipeline = Pipeline(nmf=nmf,
|
17 |
+
dataset=dataset,
|
18 |
+
reduce=reduce,
|
19 |
+
noise_type=noise_type,
|
20 |
+
noise_level=noise_level,
|
21 |
+
random_state=random_state,
|
22 |
+
scaler=scaler)
|
23 |
+
|
24 |
+
def align_reduce(self, dataset_name):
|
25 |
+
return 1 if dataset_name == 'ORL' else 3
|
26 |
+
|
27 |
+
def reset_pipeline(self, nmf, dataset, reduce, noise_type, noise_level, random_state, scaler):
|
28 |
+
noise_type, noise_level = self.convert_level_to_number(noise_type, noise_level)
|
29 |
+
self.pipeline = Pipeline(nmf=nmf,
|
30 |
+
dataset=dataset,
|
31 |
+
reduce=reduce,
|
32 |
+
noise_type=noise_type,
|
33 |
+
noise_level=noise_level,
|
34 |
+
random_state=random_state,
|
35 |
+
scaler=scaler)
|
36 |
+
|
37 |
+
def convert_level_to_number(self, type, level):
|
38 |
+
map_dict = {"Uniform": {"Low": 0.1, "High": 0.3},
|
39 |
+
"Gaussian": {"Low": 0.05, "High": 0.08},
|
40 |
+
"Laplacian": {"Low": 0.04, "High": 0.06},
|
41 |
+
"Salt & Pepper": {"Low": 0.02, "High": 0.1},
|
42 |
+
"Block": {"Low": 10, "High": 15}}
|
43 |
+
type_name = type.lower() if type != "Salt & pepper" else "salt_and_pepper"
|
44 |
+
return type_name, map_dict[type][level]
|
45 |
+
|
46 |
+
def execute(self, max_iter=500, idx=9):
|
47 |
+
self.pipeline.execute(max_iter=max_iter)
|
48 |
+
return *self.visualize(idx), *self.metrics()
|
49 |
+
|
50 |
+
def visualize(self, idx=9):
|
51 |
+
image_raw, image_noise, image_recon = self.pipeline.visualization(idx=idx)
|
52 |
+
return self.array2image(image_raw), self.array2image(image_noise), self.array2image(image_recon)
|
53 |
+
|
54 |
+
def metrics(self):
|
55 |
+
return self.pipeline.metrics
|
56 |
+
|
57 |
+
def array2image(self, array):
|
58 |
+
image_size = self.pipeline.img_size
|
59 |
+
return PIL.Image.fromarray(self.scale_pixel(array)).resize((image_size))
|
60 |
+
|
61 |
+
def scale_pixel(self, image):
|
62 |
+
return ((image - image.min()) / (image.max() - image.min()) * 255).astype(np.uint8)
|
63 |
+
|
64 |
+
def clear_params(self):
|
65 |
+
self.pipeline = Pipeline()
|
66 |
+
return 'L1NormRegularizedNMF', 'YaleB', 3, 'Salt & Pepper', 'Low', 99, 'MinMax'
|
67 |
+
|
68 |
+
app = App()
|
69 |
+
image_size = app.pipeline.img_size
|
70 |
+
|
71 |
+
with gr.Blocks() as demo:
|
72 |
+
gr.Markdown("# NMF Image Reconstruction")
|
73 |
+
with gr.Row():
|
74 |
+
with gr.Group():
|
75 |
+
with gr.Row():
|
76 |
+
nmf = gr.Dropdown(
|
77 |
+
label="NMF Algorithm",
|
78 |
+
choices=['L1NormRegularizedNMF', 'L2NormNMF', 'KLDivergenceNMF',
|
79 |
+
'ISDivergenceNMF', 'L21NormNMF', 'HSCostNMF',
|
80 |
+
'CappedNormNMF', 'CauchyNMF'],
|
81 |
+
value='L1NormRegularizedNMF',
|
82 |
+
info="Choose the NMF algorithm.")
|
83 |
+
|
84 |
+
dataset = gr.Dropdown(
|
85 |
+
label="Dataset",
|
86 |
+
choices=['ORL', 'YaleB'],
|
87 |
+
value='YaleB',
|
88 |
+
info="Choose the dataset.")
|
89 |
+
|
90 |
+
reduce = gr.Number(
|
91 |
+
value=3,
|
92 |
+
label="Reduce",
|
93 |
+
info="Choose the reduce.")
|
94 |
+
|
95 |
+
with gr.Row():
|
96 |
+
noise_type = gr.Dropdown(
|
97 |
+
label="Noise Type",
|
98 |
+
choices=['Uniform', 'Gaussian', 'Laplacian', 'Salt & Pepper', 'Block'],
|
99 |
+
value='Salt & Pepper',
|
100 |
+
info="Choose the noise type.")
|
101 |
+
|
102 |
+
noise_level = gr.Radio(
|
103 |
+
choices=['Low', 'High'],
|
104 |
+
value='Low',
|
105 |
+
label="Noise Level",
|
106 |
+
info="Choose the noise level."
|
107 |
+
)
|
108 |
+
|
109 |
+
with gr.Row():
|
110 |
+
random_state = gr.Number(
|
111 |
+
value=99,
|
112 |
+
label="Random State",
|
113 |
+
info="Choose the random state.",)
|
114 |
+
|
115 |
+
scaler = gr.Dropdown(
|
116 |
+
label="Scaler",
|
117 |
+
choices=['MinMax', 'Standard'],
|
118 |
+
value='MinMax',
|
119 |
+
info="Choose the scaler.")
|
120 |
+
|
121 |
+
with gr.Row():
|
122 |
+
max_iter= gr.Number(
|
123 |
+
value=500,
|
124 |
+
label="Max Iteration",
|
125 |
+
info="Choose the max iteration.")
|
126 |
+
idx = gr.Number(
|
127 |
+
value=9,
|
128 |
+
label="Image Index",
|
129 |
+
info="Choose the image index.")
|
130 |
+
|
131 |
+
with gr.Row():
|
132 |
+
execute_bt = gr.Button(value="Execute Algorithm",)
|
133 |
+
clear_params_bt = gr.Button(
|
134 |
+
value="Clear Parameters")
|
135 |
+
|
136 |
+
with gr.Group():
|
137 |
+
with gr.Row():
|
138 |
+
|
139 |
+
output_image_raw = gr.Image(
|
140 |
+
height=image_size[1],
|
141 |
+
width=image_size[0],
|
142 |
+
image_mode="L",
|
143 |
+
label="Original Image",
|
144 |
+
show_download_button=True,
|
145 |
+
show_share_button=True,)
|
146 |
+
output_image_noise = gr.Image(
|
147 |
+
height=image_size[1],
|
148 |
+
width=image_size[0],
|
149 |
+
label="Noisy Image",
|
150 |
+
image_mode="L",
|
151 |
+
show_download_button=True,
|
152 |
+
show_share_button=True,)
|
153 |
+
output_image_recon = gr.Image(
|
154 |
+
height=image_size[1],
|
155 |
+
width=image_size[0],
|
156 |
+
label="Reconstructed Image",
|
157 |
+
image_mode="L",
|
158 |
+
show_download_button=True,
|
159 |
+
show_share_button=True,)
|
160 |
+
|
161 |
+
with gr.Row():
|
162 |
+
rmse = gr.Number(
|
163 |
+
label="RMSE",
|
164 |
+
info="Average root mean square error",
|
165 |
+
precision=4,)
|
166 |
+
acc = gr.Number(
|
167 |
+
label="Acc",
|
168 |
+
info="Accuracy",
|
169 |
+
precision=4,)
|
170 |
+
nmi = gr.Number(
|
171 |
+
label="NMI",
|
172 |
+
info="Normalized mutual information",
|
173 |
+
precision=4,)
|
174 |
+
|
175 |
+
clear_output_bt = gr.ClearButton(
|
176 |
+
value="Clear Output",
|
177 |
+
components=[output_image_raw, output_image_noise, output_image_recon, rmse, acc, nmi],)
|
178 |
+
|
179 |
+
nmf.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
|
180 |
+
dataset.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
|
181 |
+
dataset.input(app.align_reduce, inputs=[dataset], outputs=[reduce])
|
182 |
+
reduce.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
|
183 |
+
noise_type.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
|
184 |
+
noise_level.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
|
185 |
+
random_state.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
|
186 |
+
scaler.input(app.reset_pipeline, inputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
|
187 |
+
idx.input(app.visualize, inputs=[idx], outputs=[output_image_raw, output_image_noise, output_image_recon])
|
188 |
+
execute_bt.click(app.execute, inputs=[max_iter, idx], outputs=[output_image_raw, output_image_noise, output_image_recon, rmse, acc, nmi])
|
189 |
+
clear_params_bt.click(app.clear_params, outputs=[nmf, dataset, reduce, noise_type, noise_level, random_state, scaler])
|
190 |
+
|
191 |
+
if __name__ == '__main__':
|
192 |
+
demo.queue()
|
193 |
+
demo.launch(inbrowser=True,
|
194 |
+
share=True,
|
195 |
+
server_name="0.0.0.0",
|
196 |
+
server_port=8080)
|
data/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
data/CroppedYaleB/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
data/CroppedYaleB/yaleB01/DEADJOE
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
*** Modified files in JOE when it aborted on Fri May 18 01:57:34 2001
|
3 |
+
*** JOE was aborted by signal 1
|
data/CroppedYaleB/yaleB01/yaleB01_P00.info
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
yaleB01_P00_Ambient.pgm
|
2 |
+
yaleB01_P00A+000E+00.pgm
|
3 |
+
yaleB01_P00A+010E-20.pgm
|
4 |
+
yaleB01_P00A+020E-10.pgm
|
5 |
+
yaleB01_P00A+025E+00.pgm
|
6 |
+
yaleB01_P00A+020E+10.pgm
|
7 |
+
yaleB01_P00A+015E+20.pgm
|
8 |
+
yaleB01_P00A+000E+20.pgm
|
9 |
+
yaleB01_P00A-015E+20.pgm
|
10 |
+
yaleB01_P00A-020E+10.pgm
|
11 |
+
yaleB01_P00A-025E+00.pgm
|
12 |
+
yaleB01_P00A-020E-10.pgm
|
13 |
+
yaleB01_P00A-010E-20.pgm
|
14 |
+
yaleB01_P00A+000E-20.pgm
|
15 |
+
yaleB01_P00A-035E-20.pgm
|
16 |
+
yaleB01_P00A-035E+15.pgm
|
17 |
+
yaleB01_P00A+000E-35.pgm
|
18 |
+
yaleB01_P00A-005E-10.pgm
|
19 |
+
yaleB01_P00A-010E+00.pgm
|
20 |
+
yaleB01_P00A-005E+10.pgm
|
21 |
+
yaleB01_P00A+005E+10.pgm
|
22 |
+
yaleB01_P00A+010E+00.pgm
|
23 |
+
yaleB01_P00A+005E-10.pgm
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+00.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+45.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E+90.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+000E-35.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+005E+10.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+005E-10.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+010E+00.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+010E-20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+015E+20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E+10.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-10.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+020E-40.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+025E+00.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+15.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+40.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E+65.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+035E-20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+050E+00.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+050E-40.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+060E+20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+060E-20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+00.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E+45.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+070E-35.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+085E+20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+085E-20.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+095E+00.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+15.pgm
ADDED
Git LFS Details
|
data/CroppedYaleB/yaleB01/yaleB01_P00A+110E+40.pgm
ADDED
Git LFS Details
|