mervenoyan commited on
Commit
3338479
·
1 Parent(s): 3f87473

initial commit

Browse files
Files changed (1) hide show
  1. app.py +199 -0
app.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from threading import Thread
4
+ from matplotlib.colors import ListedColormap
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.datasets import make_moons, make_circles, make_classification
8
+ from sklearn.neural_network import MLPClassifier
9
+ from sklearn.neighbors import KNeighborsClassifier
10
+ from sklearn.svm import SVC
11
+ from sklearn.gaussian_process import GaussianProcessClassifier
12
+ from sklearn.gaussian_process.kernels import RBF
13
+ from sklearn.tree import DecisionTreeClassifier
14
+ from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
15
+ from sklearn.naive_bayes import GaussianNB
16
+ from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
17
+ from sklearn.inspection import DecisionBoundaryDisplay
18
+ from sklearn.datasets import make_blobs, make_circles, make_moons
19
+ import gradio as gr
20
+ import math
21
+ from functools import partial
22
+ import time
23
+
24
+ import matplotlib
25
+
26
+ from sklearn import svm
27
+ from sklearn.datasets import make_moons, make_blobs
28
+ from sklearn.covariance import EllipticEnvelope
29
+ from sklearn.ensemble import IsolationForest
30
+ from sklearn.neighbors import LocalOutlierFactor
31
+ from sklearn.linear_model import SGDOneClassSVM
32
+ from sklearn.kernel_approximation import Nystroem
33
+ from sklearn.pipeline import make_pipeline
34
+
35
+
36
+ ### DATASETS
37
+
38
+ def normalize(X):
39
+ return StandardScaler().fit_transform(X)
40
+
41
+ # Example settings
42
+ n_samples = 300
43
+ outliers_fraction = 0.15
44
+ n_outliers = int(outliers_fraction * n_samples)
45
+ n_inliers = n_samples - n_outliers
46
+
47
+ #### MODELS
48
+
49
+ def get_groundtruth_model(X, labels):
50
+ # dummy model to show true label distribution
51
+ class Dummy:
52
+ def __init__(self, y):
53
+ self.labels_ = labels
54
+
55
+ return Dummy(labels)
56
+ ############
57
+ # Define datasets
58
+ blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
59
+ DATA_MAPPING = {
60
+ "Central Blob":make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
61
+ "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
62
+ "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
63
+ "Moons": 4.0
64
+ * (
65
+ make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0]
66
+ - np.array([0.5, 0.25])
67
+ ),
68
+ "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
69
+ }
70
+
71
+
72
+ NAME_CLF_MAPPING = {"Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
73
+ "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
74
+ "One-Class SVM (SGD)":make_pipeline(
75
+ Nystroem(gamma=0.1, random_state=42, n_components=150),
76
+ SGDOneClassSVM(
77
+ nu=outliers_fraction,
78
+ shuffle=True,
79
+ fit_intercept=True,
80
+ random_state=42,
81
+ tol=1e-6,
82
+ ),
83
+ ),
84
+ "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
85
+ "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
86
+ }
87
+
88
+
89
+
90
+ ###########################################################
91
+
92
+ # Compare given classifiers under given settings
93
+
94
+ DATASETS = [
95
+ make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
96
+ make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
97
+ make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
98
+ 4.0
99
+ * (
100
+ make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0]
101
+ - np.array([0.5, 0.25])
102
+ ),
103
+ 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
104
+ ]
105
+ ########################################################
106
+
107
+
108
+ ###########
109
+
110
+ #### PLOT
111
+ FIGSIZE = 7,7
112
+ figure = plt.figure(figsize=(25, 10))
113
+ i = 1
114
+
115
+
116
+
117
+
118
+ def train_models(selected_data, clf_name):
119
+ xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
120
+ clf = NAME_CLF_MAPPING[clf_name]
121
+ plt.figure(figsize=(len(NAME_CLF_MAPPING) * 2 + 4, 12.5))
122
+
123
+
124
+ plot_num = 1
125
+ rng = np.random.RandomState(42)
126
+ X = DATA_MAPPING[selected_data]
127
+ X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
128
+
129
+ t0 = time.time()
130
+ clf.fit(X)
131
+ t1 = time.time()
132
+ # fit the data and tag outliers
133
+ if clf_name == "Local Outlier Factor":
134
+ y_pred = clf.fit_predict(X)
135
+ else:
136
+ y_pred = clf.fit(X).predict(X)
137
+
138
+ # plot the levels lines and the points
139
+ if clf_name != "Local Outlier Factor": # LOF does not implement predict
140
+ Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
141
+ Z = Z.reshape(xx.shape)
142
+ plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
143
+
144
+ colors = np.array(["#377eb8", "#ff7f00"])
145
+ plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[(y_pred + 1) // 2])
146
+
147
+ plt.xlim(-7, 7)
148
+ plt.ylim(-7, 7)
149
+ plt.xticks(())
150
+ plt.yticks(())
151
+ plt.text(
152
+ 0.99,
153
+ 0.01,
154
+ ("%.2fs" % (t1 - t0)).lstrip("0"),
155
+ transform=plt.gca().transAxes,
156
+ size=15,
157
+ horizontalalignment="right",
158
+ )
159
+ plot_num += 1
160
+
161
+ return plt
162
+
163
+
164
+
165
+ description = "Learn how different anomaly detection algorithms perform in different datasets."
166
+
167
+ def iter_grid(n_rows, n_cols):
168
+ # create a grid using gradio Block
169
+ for _ in range(n_rows):
170
+ with gr.Row():
171
+ for _ in range(n_cols):
172
+ with gr.Column():
173
+ yield
174
+
175
+ title = "🕵️‍♀️ compare anomaly detection algorithms 🕵️‍♀️"
176
+ with gr.Blocks() as demo:
177
+ gr.Markdown(f"## {title}")
178
+ gr.Markdown(description)
179
+
180
+ input_models = list(NAME_CLF_MAPPING)
181
+ input_data = gr.Radio(
182
+ choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
183
+ value="Moons"
184
+ )
185
+ counter = 0
186
+
187
+
188
+ for _ in iter_grid(5, 5):
189
+ if counter >= len(input_models):
190
+ break
191
+
192
+ input_model = input_models[counter]
193
+ plot = gr.Plot(label=input_model)
194
+ fn = partial(train_models, clf_name=input_model)
195
+ input_data.change(fn=fn, inputs=[input_data], outputs=plot)
196
+ counter += 1
197
+
198
+ demo.launch(enable_queue=True, debug=True)
199
+