Spaces:
Runtime error
Runtime error
Commit
·
15afd18
1
Parent(s):
1aa5ac2
Upload 2 files
Browse files
app.py
CHANGED
@@ -1,475 +1,21 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
def __init__(self, model):
|
23 |
-
self.clf = model
|
24 |
-
self.scaler = None
|
25 |
-
self.label_encoder = None
|
26 |
-
|
27 |
-
def train(self, X, Y):
|
28 |
-
# 对标签进行编码
|
29 |
-
self.label_encoder = LabelEncoder()
|
30 |
-
Y = self.label_encoder.fit_transform(Y)
|
31 |
-
|
32 |
-
# 对特征进行标准化
|
33 |
-
self.scaler = StandardScaler()
|
34 |
-
X = self.scaler.fit_transform(X)
|
35 |
-
|
36 |
-
# 划分训练集和测试集
|
37 |
-
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
|
38 |
-
|
39 |
-
# 训练模型
|
40 |
-
self.clf.fit(X_train, Y_train)
|
41 |
-
|
42 |
-
def predict_samples(self, samples):
|
43 |
-
# 对样本进行相同的预处理步骤
|
44 |
-
samples = self.scaler.transform(samples)
|
45 |
-
|
46 |
-
# 使用模型进行预测
|
47 |
-
predictions = self.clf.predict(samples)
|
48 |
-
|
49 |
-
# 将预测的标签解码回原始值
|
50 |
-
predictions = self.label_encoder.inverse_transform(predictions)
|
51 |
-
|
52 |
-
return predictions
|
53 |
-
|
54 |
-
|
55 |
-
def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
|
56 |
-
max_score_index = np.argmax(np.array(temp_scores))
|
57 |
-
current_score.append(temp_scores[max_score_index])
|
58 |
-
selected_indices.add(max_score_index)
|
59 |
-
selected_indices_list.append(max_score_index)
|
60 |
-
|
61 |
-
|
62 |
-
def fs(data, method, num_fea_int, clf, testsample):
|
63 |
-
num_fea_int = int(num_fea_int)
|
64 |
-
if method == 'MRMR_FCD':
|
65 |
-
data = pd.read_csv(data.name)
|
66 |
-
X = data.iloc[:, :-1].values
|
67 |
-
y = data['Label'].values
|
68 |
-
num_features = len(X[0])
|
69 |
-
f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_features)]
|
70 |
-
# 添加起始特征的分数到current_score
|
71 |
-
current_score = [max(f_test_scores)]
|
72 |
-
# 索引从最高分数的特征开始
|
73 |
-
start_feature_index = f_test_scores.index(max(f_test_scores))
|
74 |
-
selected_indices = set()
|
75 |
-
selected_indices_list = []
|
76 |
-
selected_indices.add(start_feature_index)
|
77 |
-
selected_indices_list.append(start_feature_index)
|
78 |
-
pearson_score_matrix = np.zeros((num_features, num_features))
|
79 |
-
for _ in range(num_fea_int - 1):
|
80 |
-
temp_scores = []
|
81 |
-
for i in range(num_features):
|
82 |
-
if i in selected_indices:
|
83 |
-
temp_scores.append(-float('inf'))
|
84 |
-
else:
|
85 |
-
f_test_score = f_test_scores[i]
|
86 |
-
diff = 0
|
87 |
-
for j in selected_indices:
|
88 |
-
# pearson score
|
89 |
-
if j > i:
|
90 |
-
if pearson_score_matrix[i][j] == 0:
|
91 |
-
pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
|
92 |
-
diff += pearson_score_matrix[i][j]
|
93 |
-
else:
|
94 |
-
if pearson_score_matrix[j][i] == 0:
|
95 |
-
pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
|
96 |
-
diff += pearson_score_matrix[j][i]
|
97 |
-
temp_scores.append(f_test_score - diff / len(selected_indices))
|
98 |
-
add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
|
99 |
-
combined = list(zip(selected_indices_list, current_score))
|
100 |
-
# 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
|
101 |
-
sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
|
102 |
-
# 将索引和特征分数写入csv文件
|
103 |
-
with open('index-score.csv', 'w', newline='') as file:
|
104 |
-
writer = csv.writer(file)
|
105 |
-
writer.writerow(["Index", "Score"]) # 写入列名
|
106 |
-
writer.writerows(sorted_combined)
|
107 |
-
|
108 |
-
inde = []
|
109 |
-
scores = []
|
110 |
-
for indy in sorted_combined:
|
111 |
-
inde.append(str(indy[0] + 1))
|
112 |
-
scores.append(indy[1])
|
113 |
-
# 创建第一个图索引-分数图
|
114 |
-
plt.figure(1, figsize=(24, 10))
|
115 |
-
plt.title("mRMR-FCD()")
|
116 |
-
plt.plot(inde, scores)
|
117 |
-
plt.xlabel("Feature Index")
|
118 |
-
plt.ylabel("Feature Score")
|
119 |
-
plt.savefig('Index_Score.png')
|
120 |
-
|
121 |
-
ff = [] # 将字符串索引转化成整型
|
122 |
-
for fire in inde:
|
123 |
-
ff.append(int(fire) - 1)
|
124 |
-
# 选择分类器
|
125 |
-
if clf == 'RF':
|
126 |
-
clf = RandomForestClassifier(n_jobs=-1)
|
127 |
-
elif clf == 'KNN':
|
128 |
-
clf = KNeighborsClassifier()
|
129 |
-
elif clf == 'DT':
|
130 |
-
clf = DecisionTreeClassifier()
|
131 |
-
elif clf == 'SVM':
|
132 |
-
clf = SVC(C=1.0, kernel='rbf')
|
133 |
-
elif clf == 'Naive Bayes':
|
134 |
-
clf = GaussianNB()
|
135 |
-
|
136 |
-
acc = []
|
137 |
-
# 对于index列表中的每个特征索引
|
138 |
-
for i in range(len(ff)):
|
139 |
-
# 使用前i个特征进行交叉验证
|
140 |
-
selected_features = X[:, [int(j) - 1 for j in ff[:i + 1]]]
|
141 |
-
scores = cross_val_score(clf, selected_features, y, cv=5)
|
142 |
-
# 计算平均准确率并添加到acc列表中
|
143 |
-
acc.append(scores.mean())
|
144 |
-
max_acc = max(acc)
|
145 |
-
max_index = acc.index(max_acc) + 1
|
146 |
-
|
147 |
-
# 创建第二个图IFS准确率率图
|
148 |
-
plt.figure(2, figsize=(24, 10))
|
149 |
-
plt.title("IFS_" + str(method) + "_Accuracy")
|
150 |
-
plt.plot(max_index, max_acc, 'ro')
|
151 |
-
plt.plot(acc)
|
152 |
-
plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
|
153 |
-
ha='center')
|
154 |
-
# 设置x轴和y轴的标签
|
155 |
-
plt.xlabel("Top n features")
|
156 |
-
plt.ylabel('Accuracy')
|
157 |
-
plt.savefig('acc.png')
|
158 |
-
|
159 |
-
# 从test.csv加载测试样本和标签
|
160 |
-
testsample = pd.read_csv(testsample.name)
|
161 |
-
test_samples = testsample.iloc[:, :-1].values
|
162 |
-
test_labels = testsample.iloc[:, -1].values
|
163 |
-
|
164 |
-
# 加载模型
|
165 |
-
models = SVC(C=1.0, kernel='rbf')
|
166 |
-
my_model = MyModel(models)
|
167 |
-
my_model.train(X, y)
|
168 |
-
|
169 |
-
# 预测测试样本的标签
|
170 |
-
predictions = my_model.predict_samples(test_samples)
|
171 |
-
# 计算混淆矩阵
|
172 |
-
cm = confusion_matrix(test_labels, predictions)
|
173 |
-
|
174 |
-
# 使用seaborn绘制混淆矩阵热力图
|
175 |
-
plt.figure(figsize=(24, 10))
|
176 |
-
sns.heatmap(cm, annot=True, fmt='d')
|
177 |
-
plt.xlabel('predict labels')
|
178 |
-
plt.ylabel('True labels')
|
179 |
-
plt.grid(True)
|
180 |
-
plt.savefig('confusion_matrix.png')
|
181 |
-
|
182 |
-
return 'Index_Score.png', 'acc.png', "confusion_matrix.png", "index-score.csv"
|
183 |
-
|
184 |
-
elif method == 'MRMR_FCQ':
|
185 |
-
data = pd.read_csv(data.name)
|
186 |
-
X = data.iloc[:, :-1].values
|
187 |
-
y = data['Label'].values
|
188 |
-
num_fea_inttures = len(X[0])
|
189 |
-
f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_fea_inttures)]
|
190 |
-
|
191 |
-
# 添加起始特征的分数到current_score
|
192 |
-
current_score = [max(f_test_scores)]
|
193 |
-
|
194 |
-
# 索引从0开始
|
195 |
-
# start_feature_index = random.randint(0, num_features - 1)
|
196 |
-
# 索引从最高分数的特征开始
|
197 |
-
start_feature_index = f_test_scores.index(max(f_test_scores))
|
198 |
-
|
199 |
-
selected_indices = set()
|
200 |
-
selected_indices_list = []
|
201 |
-
selected_indices.add(start_feature_index)
|
202 |
-
selected_indices_list.append(start_feature_index)
|
203 |
-
pearson_score_matrix = np.zeros((num_fea_inttures, num_fea_inttures))
|
204 |
-
for _ in range(num_fea_int - 1):
|
205 |
-
temp_scores = []
|
206 |
-
for i in range(num_fea_inttures):
|
207 |
-
if i in selected_indices:
|
208 |
-
temp_scores.append(-float('inf'))
|
209 |
-
else:
|
210 |
-
f_test_score = f_test_scores[i]
|
211 |
-
q = 0
|
212 |
-
for j in selected_indices:
|
213 |
-
# pearson score
|
214 |
-
if j > i:
|
215 |
-
if pearson_score_matrix[i][j] == 0:
|
216 |
-
pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
|
217 |
-
q += pearson_score_matrix[i][j]
|
218 |
-
else:
|
219 |
-
if pearson_score_matrix[j][i] == 0:
|
220 |
-
pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
|
221 |
-
q += pearson_score_matrix[j][i]
|
222 |
-
temp_scores.append(f_test_score / (q / len(selected_indices)))
|
223 |
-
add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
|
224 |
-
combined = list(zip(selected_indices_list, current_score))
|
225 |
-
|
226 |
-
# 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
|
227 |
-
sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
|
228 |
-
# 将索引和特征分数写入csv文件
|
229 |
-
with open('index-score.csv', 'w', newline='') as file:
|
230 |
-
writer = csv.writer(file)
|
231 |
-
writer.writerow(["Index", "Score"]) # 写入列名
|
232 |
-
writer.writerows(sorted_combined)
|
233 |
-
|
234 |
-
inde = []
|
235 |
-
scores = []
|
236 |
-
for indy in sorted_combined:
|
237 |
-
inde.append(str(indy[0] + 1))
|
238 |
-
scores.append(indy[1])
|
239 |
-
# 创建第一个图索引-分数图
|
240 |
-
plt.figure(1, figsize=(24, 10))
|
241 |
-
plt.title("mRMR-FCD()")
|
242 |
-
plt.plot(inde, scores)
|
243 |
-
plt.xlabel("Feature Index")
|
244 |
-
plt.ylabel("Feature Score")
|
245 |
-
plt.savefig('Index_Score.png')
|
246 |
-
|
247 |
-
ff = [] # 将字符串索引转化成整型
|
248 |
-
for fire in inde:
|
249 |
-
ff.append(int(fire) - 1)
|
250 |
-
# 选择分类器
|
251 |
-
if clf == 'RF':
|
252 |
-
clf = RandomForestClassifier(n_jobs=-1)
|
253 |
-
elif clf == 'KNN':
|
254 |
-
clf = KNeighborsClassifier()
|
255 |
-
elif clf == 'DT':
|
256 |
-
clf = DecisionTreeClassifier()
|
257 |
-
elif clf == 'SVM':
|
258 |
-
clf = SVC(C=1.0, kernel='rbf')
|
259 |
-
elif clf == 'Naive Bayes':
|
260 |
-
clf = GaussianNB()
|
261 |
-
|
262 |
-
acc = []
|
263 |
-
# 对于index列表中的每个特征索引
|
264 |
-
for i in range(len(ff)):
|
265 |
-
# 使用前i个特征进行交叉验证
|
266 |
-
selected_features = X[:, [int(j) - 1 for j in ff[:i + 1]]]
|
267 |
-
scores = cross_val_score(clf, selected_features, y, cv=5)
|
268 |
-
# 计算平均准确率并添加到acc列表中
|
269 |
-
acc.append(scores.mean())
|
270 |
-
max_acc = max(acc)
|
271 |
-
max_index = acc.index(max_acc) + 1
|
272 |
-
|
273 |
-
# 创建第二个图IFS准确率率图
|
274 |
-
plt.figure(2, figsize=(24, 10))
|
275 |
-
plt.title("IFS_" + str(method) + "_Accuracy")
|
276 |
-
plt.plot(max_index, max_acc, 'ro')
|
277 |
-
plt.plot(acc)
|
278 |
-
plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
|
279 |
-
ha='center')
|
280 |
-
# 设置x轴和y轴的标签
|
281 |
-
plt.xlabel("Top n features")
|
282 |
-
plt.ylabel('Accuracy')
|
283 |
-
plt.savefig('acc.png')
|
284 |
-
|
285 |
-
# 从test.csv加载测试样本和标签
|
286 |
-
testsample = pd.read_csv(testsample.name)
|
287 |
-
test_samples = testsample.iloc[:, :-1].values
|
288 |
-
test_labels = testsample.iloc[:, -1].values
|
289 |
-
|
290 |
-
# 加载模型
|
291 |
-
models = SVC(C=1.0, kernel='rbf')
|
292 |
-
my_model = MyModel(models)
|
293 |
-
my_model.train(X, y)
|
294 |
-
|
295 |
-
# 预测测试样本的标签
|
296 |
-
predictions = my_model.predict_samples(test_samples)
|
297 |
-
# 计算混淆矩阵
|
298 |
-
cm = confusion_matrix(test_labels, predictions)
|
299 |
-
|
300 |
-
# 使用seaborn绘制混淆矩阵热力图
|
301 |
-
plt.figure(figsize=(24, 10))
|
302 |
-
sns.heatmap(cm, annot=True, fmt='d')
|
303 |
-
plt.xlabel('predict labels')
|
304 |
-
plt.ylabel('True labels')
|
305 |
-
plt.grid(True)
|
306 |
-
plt.savefig('confusion_matrix.png')
|
307 |
-
|
308 |
-
return 'Index_Score.png', 'acc.png', "confusion_matrix.png", "index-score.csv"
|
309 |
-
|
310 |
-
|
311 |
-
# 添加你们的代码在这里,我们先全部写成函数,然后再封装成类,主要是先把样子做出来
|
312 |
-
# 然后目前最终结果是返回一个图片,包含了含有特征的索引及其对应的分数的图,还有一张是增量式特征选择的准确率图
|
313 |
-
# 我上面的代码很多地方还可以优化,比如画图,选择分类器这些,但是你们都先不用管,把下面的几个elif写完先,然后我们再讨论优化代码的事情。
|
314 |
-
elif method == 'Lasso':
|
315 |
-
data = pd.read_csv(data.name)
|
316 |
-
X = data.iloc[:, :-1]
|
317 |
-
y = data.iloc[:, -1:].values.flatten()
|
318 |
-
|
319 |
-
cl = LassoLarsCV(cv=20, max_iter=80000).fit(X, y)
|
320 |
-
|
321 |
-
importance = np.abs(cl.coef_)
|
322 |
-
feature_names = list(X)
|
323 |
-
a = len(feature_names)
|
324 |
-
|
325 |
-
idx_features = (-importance).argsort()[:a]
|
326 |
-
name_features = np.array(feature_names)[idx_features]
|
327 |
-
result = pd.DataFrame({'index': idx_features, 'Score': importance[idx_features]})
|
328 |
-
result_rank = result.sort_values(by='Score', ascending=False, ignore_index=True)
|
329 |
-
result_rank.to_csv("index-score.csv")
|
330 |
-
inde = result_rank['index'].tolist()
|
331 |
-
score = result_rank['Score'].tolist()
|
332 |
-
|
333 |
-
index = []
|
334 |
-
for i in inde:
|
335 |
-
index.append(str(i))
|
336 |
-
plt.figure(1, figsize=(24, 12))
|
337 |
-
plt.title(str(method))
|
338 |
-
plt.plot(index[:num_fea_int], score[:num_fea_int])
|
339 |
-
|
340 |
-
# 设置x轴和y轴的标签
|
341 |
-
plt.xlabel('Feature Index')
|
342 |
-
plt.ylabel('Feature Score')
|
343 |
-
plt.savefig('Index_Score.png')
|
344 |
-
if clf == 'RF':
|
345 |
-
clf = RandomForestClassifier(n_jobs=-1)
|
346 |
-
elif clf == 'KNN':
|
347 |
-
clf = KNeighborsClassifier()
|
348 |
-
elif clf == 'DT':
|
349 |
-
clf = DecisionTreeClassifier()
|
350 |
-
elif clf == 'SVM':
|
351 |
-
clf = SVC()
|
352 |
-
elif clf == 'Naive Bayes':
|
353 |
-
clf = GaussianNB()
|
354 |
-
|
355 |
-
inde = inde[:num_fea_int]
|
356 |
-
index = index[:num_fea_int]
|
357 |
-
acc = []
|
358 |
-
# 对于index列表中的每个特征索引
|
359 |
-
|
360 |
-
X = data.iloc[:, :-1].values
|
361 |
-
for i in range(len(index)):
|
362 |
-
# 使用前i个特征进行交叉验证
|
363 |
-
selected_features = X[:, [int(j) - 1 for j in inde[:i + 1]]]
|
364 |
-
scores = cross_val_score(clf, selected_features, y, cv=5)
|
365 |
-
# 计算平均准确率并添加到acc列表中
|
366 |
-
acc.append(scores.mean())
|
367 |
-
max_acc = max(acc)
|
368 |
-
max_index = acc.index(max_acc) + 1
|
369 |
-
|
370 |
-
# ax2 = fig.add_subplot(212)
|
371 |
-
# ax2.set_title("IFS_" + str(method) + "_Accuracy")
|
372 |
-
plt.figure(2, figsize=(24, 10))
|
373 |
-
plt.plot(max_index, max_acc, 'ro')
|
374 |
-
plt.plot(acc)
|
375 |
-
plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
|
376 |
-
ha='center')
|
377 |
-
# 设置x轴和y轴的标签
|
378 |
-
# ax2.set_xlabel()
|
379 |
-
# ax2.set_ylabel('Accuracy')
|
380 |
-
plt.xlabel('Top n features')
|
381 |
-
plt.ylabel('Accuracy')
|
382 |
-
plt.grid(True)
|
383 |
-
plt.savefig('acc.png')
|
384 |
-
|
385 |
-
testsample = pd.read_csv(testsample.name)
|
386 |
-
test_samples = testsample.iloc[:, :-1].values
|
387 |
-
test_labels = testsample.iloc[:, -1].values
|
388 |
-
models = SVC(C=1.0, kernel='rbf')
|
389 |
-
my_model = MyModel(models)
|
390 |
-
my_model.train(X, y)
|
391 |
-
|
392 |
-
# 预测测试样本的标签并计算准确率
|
393 |
-
predictions = my_model.predict_samples(test_samples)
|
394 |
-
# 计算混淆矩阵
|
395 |
-
cm = confusion_matrix(test_labels, predictions)
|
396 |
-
|
397 |
-
# 使用seaborn绘制混淆矩阵热力图
|
398 |
-
plt.figure(figsize=(24, 10))
|
399 |
-
sns.heatmap(cm, annot=True, fmt='d')
|
400 |
-
plt.xlabel('predict labels')
|
401 |
-
plt.ylabel('True labels')
|
402 |
-
plt.grid(True)
|
403 |
-
plt.savefig('confusion_matrix.png')
|
404 |
-
|
405 |
-
return 'Index_Score.png', 'acc.png', "confusion_matrix.png",'index-score.csv'
|
406 |
-
|
407 |
-
elif method == 'Ensemble':
|
408 |
-
pass
|
409 |
-
elif method == 'CI':
|
410 |
-
pass
|
411 |
-
|
412 |
-
|
413 |
-
title = "FSALs: Robust Feature selection framework"
|
414 |
-
description = r"""<center><img src='https://raw.githubusercontent.com/Justin-12138/bio_if/d1fdf085f8e679dcceecc2c05014b1d4a237e033/assets/favicon.svg' alt='FSALs logo'></center>
|
415 |
-
<b>Official Gradio demo</b> for <a href='https://huggingface.co/spaces/Justin-12138/FSALA' target='_blank'><b>Application of Causal Inference in Alzheimer's Disease(CCFC2023)</b></a>.<br>
|
416 |
-
🔥 Fsals is a Robust feature selection framework based on causal inference. <br>
|
417 |
-
🤗 Try using fsals in different data sets.!<br>
|
418 |
-
"""
|
419 |
-
article = r"""
|
420 |
-
If FSALs is helpful, please help to ⭐ the <a href='https://github.com/Justin-12138/bio_if' target='_blank'>Github Repo</a>. Thanks!
|
421 |
-
[](https://github.com/Justin-12138/bio_if)
|
422 |
-
|
423 |
-
---
|
424 |
-
|
425 |
-
📝 **Citation**
|
426 |
-
|
427 |
-
If our work is useful for your research, please consider citing:
|
428 |
-
```bibtex
|
429 |
-
@article{zlhl2023,
|
430 |
-
author = {Xiaolong Zhou, Zhao Liu, Yuchen Huang, Kun Lin},
|
431 |
-
title = {A Novel Ensemble Feature Selection Method for Biomarkers of Alzheimer's disease},
|
432 |
-
booktitle = {GUET Publisher},
|
433 |
-
year = {2023}
|
434 |
-
}
|
435 |
-
```
|
436 |
-
|
437 |
-
📋 **License**
|
438 |
-
|
439 |
-
This project is licensed under <a rel="license" href="https://github.com/Justin-12138/bio_if/blob/main/LICENSE">GPL License 2.0</a>.
|
440 |
-
Redistribution and use for non-commercial purposes should follow this license.
|
441 |
-
|
442 |
-
📧 **Contact**
|
443 |
-
|
444 |
-
If you have any questions, please feel free to reach me out at <b>[email protected]</b>.
|
445 |
-
|
446 |
-
<div>
|
447 |
-
🤗 Find Me:
|
448 |
-
<a href="https://github.com/Justin-12138"><img style="margin-top:0.5em; margin-bottom:2em" src="https://img.shields.io/github/followers/Justin-12138?style=social" alt="Github Follow"></a>
|
449 |
-
</div>
|
450 |
-
"""
|
451 |
-
|
452 |
-
iface = gr.Interface(
|
453 |
-
fn=fs,
|
454 |
-
title=title,
|
455 |
-
description=description,
|
456 |
-
|
457 |
-
inputs=["file",
|
458 |
-
gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
|
459 |
-
gr.inputs.Number(),
|
460 |
-
gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes']),
|
461 |
-
"file"
|
462 |
-
],
|
463 |
-
outputs=["image", "image", "image", "file"],
|
464 |
-
article=article,
|
465 |
-
examples=[
|
466 |
-
["example_data.csv", 'MRMR_FCQ', 20, 'RF', "test.csv"],
|
467 |
-
["example_data.csv", 'MRMR_FCD', 10, 'SVM', "test.csv"],
|
468 |
-
["example_data.csv", 'MRMR_FCD', 30, 'KNN', "test.csv"],
|
469 |
-
["example_data.csv", 'Lasso', 50, 'DT', "test.csv"],
|
470 |
-
["example_data.csv", 'Lasso', 40, 'Naive Bayes', "test.csv"],
|
471 |
-
],
|
472 |
-
allow_flagging="never"
|
473 |
-
)
|
474 |
-
|
475 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from src import des, fs
|
3 |
+
|
4 |
+
if __name__ == '__main__':
|
5 |
+
iface = gr.Interface(
|
6 |
+
fn=fs,
|
7 |
+
title=des("title"),
|
8 |
+
description=des("description"),
|
9 |
+
article=des("article"),
|
10 |
+
inputs=des("inputs"),
|
11 |
+
outputs=des("outputs"),
|
12 |
+
examples=[
|
13 |
+
["example_data.csv", 'MRMR_FCQ', 20, 'RF', "test.csv"],
|
14 |
+
["example_data.csv", 'MRMR_FCD', 10, 'SVM', "test.csv"],
|
15 |
+
["example_data.csv", 'MRMR_FCD', 30, 'KNN', "test.csv"],
|
16 |
+
["example_data.csv", 'Lasso', 30, 'DT', "test.csv"],
|
17 |
+
["example_data.csv", 'Lasso', 20, 'Naive Bayes', "test.csv"],
|
18 |
+
],
|
19 |
+
allow_flagging="never"
|
20 |
+
)
|
21 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src.py
ADDED
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import gradio as gr
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import numpy as np
|
5 |
+
import pandas as pd
|
6 |
+
import seaborn as sns
|
7 |
+
from scipy.stats import f_oneway
|
8 |
+
from sklearn.ensemble import RandomForestClassifier
|
9 |
+
from sklearn.linear_model import LassoLarsCV
|
10 |
+
from sklearn.model_selection import cross_val_score
|
11 |
+
from sklearn.model_selection import train_test_split
|
12 |
+
from sklearn.naive_bayes import GaussianNB
|
13 |
+
from sklearn.neighbors import KNeighborsClassifier
|
14 |
+
from sklearn.preprocessing import LabelEncoder
|
15 |
+
from sklearn.preprocessing import StandardScaler
|
16 |
+
from sklearn.svm import SVC
|
17 |
+
from sklearn.tree import DecisionTreeClassifier
|
18 |
+
from sklearn.metrics import confusion_matrix
|
19 |
+
|
20 |
+
|
21 |
+
class MyModel:
|
22 |
+
def __init__(self, model):
|
23 |
+
self.clf = model
|
24 |
+
self.scaler = None
|
25 |
+
self.label_encoder = None
|
26 |
+
|
27 |
+
def train(self, X, Y):
|
28 |
+
# 对标签进行编码
|
29 |
+
self.label_encoder = LabelEncoder()
|
30 |
+
Y = self.label_encoder.fit_transform(Y)
|
31 |
+
|
32 |
+
# 对特征进行标准化
|
33 |
+
self.scaler = StandardScaler()
|
34 |
+
X = self.scaler.fit_transform(X)
|
35 |
+
|
36 |
+
# 划分训练集和测试集
|
37 |
+
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
|
38 |
+
|
39 |
+
# 训练模型
|
40 |
+
self.clf.fit(X_train, Y_train)
|
41 |
+
|
42 |
+
def predict_samples(self, samples):
|
43 |
+
# 对样本进行相同的预处理步骤
|
44 |
+
samples = self.scaler.transform(samples)
|
45 |
+
|
46 |
+
# 使用模型进行预测
|
47 |
+
predictions = self.clf.predict(samples)
|
48 |
+
|
49 |
+
# 将预测的标签解码回原始值
|
50 |
+
predictions = self.label_encoder.inverse_transform(predictions)
|
51 |
+
|
52 |
+
return predictions
|
53 |
+
|
54 |
+
|
55 |
+
# choose classifier
|
56 |
+
def setclf(clf_name):
|
57 |
+
if clf_name == 'RF':
|
58 |
+
return RandomForestClassifier(n_jobs=-1)
|
59 |
+
elif clf_name == 'KNN':
|
60 |
+
return KNeighborsClassifier(n_jobs=-1)
|
61 |
+
elif clf_name == 'DT':
|
62 |
+
return DecisionTreeClassifier()
|
63 |
+
elif clf_name == 'SVM':
|
64 |
+
return SVC(C=1.0, kernel='rbf')
|
65 |
+
elif clf_name == 'Naive Bayes':
|
66 |
+
return GaussianNB()
|
67 |
+
|
68 |
+
|
69 |
+
# cal score
|
70 |
+
def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
|
71 |
+
max_score_index = np.argmax(np.array(temp_scores))
|
72 |
+
current_score.append(temp_scores[max_score_index])
|
73 |
+
selected_indices.add(max_score_index)
|
74 |
+
selected_indices_list.append(max_score_index)
|
75 |
+
|
76 |
+
|
77 |
+
# load data
|
78 |
+
def load_data(data, out_name):
|
79 |
+
# global X, y
|
80 |
+
data = pd.read_csv(data.name)
|
81 |
+
if not out_name:
|
82 |
+
X = data.iloc[:, :-1].values
|
83 |
+
y = data.iloc[:, -1].values
|
84 |
+
elif out_name:
|
85 |
+
X = data.iloc[:, :-1]
|
86 |
+
y = data.iloc[:, -1].values.flatten()
|
87 |
+
return X, y
|
88 |
+
|
89 |
+
|
90 |
+
def MRMR_FCD(data, testsample, num_fea_int):
|
91 |
+
X, y = load_data(data, False)
|
92 |
+
# 从test.csv加载测试样本和标签
|
93 |
+
test_samples, test_labels = load_data(testsample, False)
|
94 |
+
# 获取特征数量
|
95 |
+
# max_fea_num = X.shape[1]
|
96 |
+
num_features = len(X[0])
|
97 |
+
f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_features)]
|
98 |
+
# 添加起始特征的分数到current_score
|
99 |
+
current_score = [max(f_test_scores)]
|
100 |
+
# 索引从最高分数的特征开始
|
101 |
+
start_feature_index = f_test_scores.index(max(f_test_scores))
|
102 |
+
selected_indices = set()
|
103 |
+
selected_indices_list = []
|
104 |
+
selected_indices.add(start_feature_index)
|
105 |
+
selected_indices_list.append(start_feature_index)
|
106 |
+
pearson_score_matrix = np.zeros((num_features, num_features))
|
107 |
+
for _ in range(num_fea_int - 1):
|
108 |
+
temp_scores = []
|
109 |
+
for i in range(num_features):
|
110 |
+
if i in selected_indices:
|
111 |
+
temp_scores.append(-float('inf'))
|
112 |
+
else:
|
113 |
+
f_test_score = f_test_scores[i]
|
114 |
+
diff = 0
|
115 |
+
for j in selected_indices:
|
116 |
+
# pearson score
|
117 |
+
if j > i:
|
118 |
+
if pearson_score_matrix[i][j] == 0:
|
119 |
+
pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
|
120 |
+
diff += pearson_score_matrix[i][j]
|
121 |
+
else:
|
122 |
+
if pearson_score_matrix[j][i] == 0:
|
123 |
+
pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
|
124 |
+
diff += pearson_score_matrix[j][i]
|
125 |
+
temp_scores.append(f_test_score - diff / len(selected_indices))
|
126 |
+
add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
|
127 |
+
combined = list(zip(selected_indices_list, current_score))
|
128 |
+
return combined, X, y, test_samples, test_labels
|
129 |
+
|
130 |
+
|
131 |
+
def MRMR_FCQ(data, testsample, num_fea_int):
|
132 |
+
X, y = load_data(data, False)
|
133 |
+
# 从test.csv加载测试样本和标签
|
134 |
+
test_samples, test_labels = load_data(testsample, False)
|
135 |
+
# 获取特征数量
|
136 |
+
# max_fea_num = X.shape[1]
|
137 |
+
|
138 |
+
num_fea_inttures = len(X[0])
|
139 |
+
f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_fea_inttures)]
|
140 |
+
|
141 |
+
# 添加起始特征的分数到current_score
|
142 |
+
current_score = [max(f_test_scores)]
|
143 |
+
|
144 |
+
# 索引从0开始
|
145 |
+
# start_feature_index = random.randint(0, num_features - 1)
|
146 |
+
# 索引从最高分数的特征开始
|
147 |
+
start_feature_index = f_test_scores.index(max(f_test_scores))
|
148 |
+
|
149 |
+
selected_indices = set()
|
150 |
+
selected_indices_list = []
|
151 |
+
selected_indices.add(start_feature_index)
|
152 |
+
selected_indices_list.append(start_feature_index)
|
153 |
+
pearson_score_matrix = np.zeros((num_fea_inttures, num_fea_inttures))
|
154 |
+
for _ in range(num_fea_int - 1):
|
155 |
+
temp_scores = []
|
156 |
+
for i in range(num_fea_inttures):
|
157 |
+
if i in selected_indices:
|
158 |
+
temp_scores.append(-float('inf'))
|
159 |
+
else:
|
160 |
+
f_test_score = f_test_scores[i]
|
161 |
+
q = 0
|
162 |
+
for j in selected_indices:
|
163 |
+
# pearson score
|
164 |
+
if j > i:
|
165 |
+
if pearson_score_matrix[i][j] == 0:
|
166 |
+
pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
|
167 |
+
q += pearson_score_matrix[i][j]
|
168 |
+
else:
|
169 |
+
if pearson_score_matrix[j][i] == 0:
|
170 |
+
pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
|
171 |
+
q += pearson_score_matrix[j][i]
|
172 |
+
temp_scores.append(f_test_score / (q / len(selected_indices)))
|
173 |
+
add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
|
174 |
+
combined = list(zip(selected_indices_list, current_score))
|
175 |
+
return combined, X, y, test_samples, test_labels
|
176 |
+
|
177 |
+
|
178 |
+
def index_score_csv(sorted_combined, filename):
|
179 |
+
with open(filename, 'w', newline='') as file:
|
180 |
+
writer = csv.writer(file)
|
181 |
+
writer.writerow(["Index", "Score"]) # 写入列名
|
182 |
+
writer.writerows(sorted_combined)
|
183 |
+
|
184 |
+
|
185 |
+
def isplot(num, width, height, title_gr, x, y, xlabbel, ylabel, filename):
|
186 |
+
plt.figure(num=num, figsize=(width, height))
|
187 |
+
plt.title(title_gr, fontsize=30)
|
188 |
+
plt.plot(x, y)
|
189 |
+
plt.xlabel(xlabel=xlabbel, fontsize=30)
|
190 |
+
plt.ylabel(ylabel=ylabel, fontsize=30)
|
191 |
+
plt.savefig(filename)
|
192 |
+
|
193 |
+
|
194 |
+
def ifsplot(num, width, height, title_gr, max_index, max_acc, acc, xlabbel, ylabel, filename):
|
195 |
+
plt.figure(num=num, figsize=(width, height))
|
196 |
+
plt.title("IFS_" + title_gr + "_Accuracy", fontsize=40)
|
197 |
+
plt.plot(max_index, max_acc, 'ro')
|
198 |
+
plt.plot(acc)
|
199 |
+
plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
|
200 |
+
ha='center', fontsize=40)
|
201 |
+
# 设置x轴和y轴的标签
|
202 |
+
plt.xlabel(xlabel=xlabbel, fontsize=40)
|
203 |
+
plt.ylabel(ylabel=ylabel, fontsize=40)
|
204 |
+
plt.savefig(filename)
|
205 |
+
|
206 |
+
|
207 |
+
def cmplot(num, width, height, cm, xlabbel, ylabel, filename):
|
208 |
+
plt.figure(num=num, figsize=(width, height))
|
209 |
+
sns.heatmap(cm, annot=True, fmt='d')
|
210 |
+
plt.xlabel(xlabel=xlabbel, fontsize=40)
|
211 |
+
plt.plot(ylabel=ylabel, fontsize=40)
|
212 |
+
plt.grid(True)
|
213 |
+
plt.savefig(filename)
|
214 |
+
|
215 |
+
pass
|
216 |
+
|
217 |
+
|
218 |
+
def des(choicce):
|
219 |
+
title = "FSALs: Robust Feature selection framework"
|
220 |
+
description = r"""<center><img src='https://raw.githubusercontent.com/Justin-12138/bio_if/d1fdf085f8e679dcceecc2c05014b1d4a237e033/assets/favicon.svg' alt='FSALs logo'></center>
|
221 |
+
<b>Official Gradio demo</b> for <a href='https://huggingface.co/spaces/Justin-12138/FSALA' target='_blank'><b>Application of Causal Inference in Alzheimer's Disease(CCFC2023)</b></a>.<br>
|
222 |
+
🔥 Fsals is a Robust feature selection framework based on causal inference. <br>
|
223 |
+
🤗 Try using fsals in different data sets.!<br>
|
224 |
+
"""
|
225 |
+
article = r"""
|
226 |
+
If FSALs is helpful, please help to ⭐ the <a href='https://github.com/Justin-12138/bio_if' target='_blank'>Github Repo</a>. Thanks!
|
227 |
+
[](https://github.com/Justin-12138/bio_if)
|
228 |
+
|
229 |
+
---
|
230 |
+
|
231 |
+
📝 **Citation**
|
232 |
+
|
233 |
+
If our work is useful for your research, please consider citing:
|
234 |
+
```bibtex
|
235 |
+
@article{zlhl2023,
|
236 |
+
author = {Xiaolong Zhou, Zhao Liu, Yuchen Huang, Kun Lin},
|
237 |
+
title = {A Novel Ensemble Feature Selection Method for Biomarkers of Alzheimer's disease},
|
238 |
+
booktitle = {GUET Publisher},
|
239 |
+
year = {2023}
|
240 |
+
}
|
241 |
+
```
|
242 |
+
📋 **License**
|
243 |
+
|
244 |
+
This project is licensed under <a rel="license" href="https://github.com/Justin-12138/bio_if/blob/main/LICENSE">GPL License 2.0</a>.
|
245 |
+
Redistribution and use for non-commercial purposes should follow this license.
|
246 |
+
|
247 |
+
📧 **Contact**
|
248 |
+
|
249 |
+
If you have any questions, please feel free to reach me out at <b>[email protected]</b>.
|
250 |
+
|
251 |
+
<div>
|
252 |
+
🤗 Find Me:
|
253 |
+
<a href="https://github.com/Justin-12138"><img style="margin-top:0.5em; margin-bottom:2em" src="https://img.shields.io/github/followers/Justin-12138?style=social" alt="Github Follow"></a>
|
254 |
+
</div>
|
255 |
+
"""
|
256 |
+
if choicce == "title":
|
257 |
+
return title
|
258 |
+
elif choicce == "description":
|
259 |
+
return description
|
260 |
+
elif choicce == "article":
|
261 |
+
return article
|
262 |
+
elif choicce == 'inputs':
|
263 |
+
inputs = [gr.inputs.File(label="Training data"),
|
264 |
+
gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI'], label="method"),
|
265 |
+
gr.inputs.Number(label="Num_feature(int)"),
|
266 |
+
gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes'], label="classifier for CV"),
|
267 |
+
gr.inputs.File(label="Testing data")
|
268 |
+
]
|
269 |
+
return inputs
|
270 |
+
elif choicce == 'outputs':
|
271 |
+
output = [gr.Image(label="Index_score"),
|
272 |
+
gr.Image(label="IFS_Acc"),
|
273 |
+
gr.Image(label="Confusion_matrix"),
|
274 |
+
gr.File(label='Index_score.csv')]
|
275 |
+
return output
|
276 |
+
|
277 |
+
|
278 |
+
def cv(X, y, index_0, clf, n_fold):
|
279 |
+
acc = []
|
280 |
+
for i in range(len(index_0)):
|
281 |
+
# 使用前i个特征进行交叉验证
|
282 |
+
selected_features = X[:, [int(j) - 1 for j in index_0[:i + 1]]]
|
283 |
+
scores = cross_val_score(clf, selected_features, y, cv=n_fold)
|
284 |
+
# 计算平均准确率并添加到acc列表中
|
285 |
+
acc.append(scores.mean())
|
286 |
+
max_acc = round(max(acc), 4)
|
287 |
+
max_index = acc.index(max(acc)) + 1
|
288 |
+
return acc, max_acc, max_index
|
289 |
+
|
290 |
+
|
291 |
+
def getindex_1(sorted_combined):
|
292 |
+
index_1 = []
|
293 |
+
index_0 = []
|
294 |
+
scores = []
|
295 |
+
for indy in sorted_combined:
|
296 |
+
index_1.append(str(indy[0] + 1))
|
297 |
+
scores.append(indy[1])
|
298 |
+
for item in index_1:
|
299 |
+
index_0.append(int(item) - 1)
|
300 |
+
return index_1, index_0, scores
|
301 |
+
|
302 |
+
|
303 |
+
def load_model(X, y, test_samples, test_labels):
|
304 |
+
models = SVC(C=1.0, kernel='rbf')
|
305 |
+
my_model = MyModel(models)
|
306 |
+
my_model.train(X, y)
|
307 |
+
# 预测测试样本的标签并计算准确率
|
308 |
+
predictions = my_model.predict_samples(test_samples)
|
309 |
+
# 计算混淆矩阵
|
310 |
+
cm = confusion_matrix(test_labels, predictions)
|
311 |
+
return cm
|
312 |
+
|
313 |
+
|
314 |
+
def lasso(data, testsample, num_fea_int):
|
315 |
+
X, y = load_data(data, True)
|
316 |
+
test_samples, test_labels = load_data(testsample, False)
|
317 |
+
cl = LassoLarsCV(cv=20, max_iter=80000).fit(X, y)
|
318 |
+
importance = np.abs(cl.coef_)
|
319 |
+
feature_names = list(X)
|
320 |
+
a = len(feature_names)
|
321 |
+
idx_features = (-importance).argsort()[:a]
|
322 |
+
# name_features = np.array(feature_names)[idx_features]
|
323 |
+
result = pd.DataFrame({'index': idx_features, 'Score': importance[idx_features]})
|
324 |
+
result_rank = result.sort_values(by='Score', ascending=False, ignore_index=True)
|
325 |
+
result_rank.to_csv("index-score.csv")
|
326 |
+
inde = result_rank['index'].tolist()
|
327 |
+
score = result_rank['Score'].tolist()
|
328 |
+
return X, y, inde, score, test_samples, test_labels, num_fea_int
|
329 |
+
|
330 |
+
|
331 |
+
def fs(data, method, num_fea_int, clf, testsample):
|
332 |
+
num_fea_int = int(num_fea_int)
|
333 |
+
if method == 'MRMR_FCD':
|
334 |
+
combined, X, y, test_samples, test_labels = MRMR_FCD(data=data, testsample=testsample, num_fea_int=num_fea_int)
|
335 |
+
# 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
|
336 |
+
sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
|
337 |
+
index_score_csv(sorted_combined=sorted_combined, filename='ab.csv')
|
338 |
+
index_1, index_0, scores = getindex_1(sorted_combined=sorted_combined)
|
339 |
+
# 画score.png
|
340 |
+
isplot(1, 24, 10,
|
341 |
+
title_gr=str(method), x=index_1, y=scores,
|
342 |
+
xlabbel="index", ylabel="scores", filename="index-score.png")
|
343 |
+
# 选择分类器
|
344 |
+
clf = setclf(clf)
|
345 |
+
acc, max_acc, max_index = cv(X=X, y=y, index_0=index_0, clf=clf, n_fold=10)
|
346 |
+
# 画acc.png
|
347 |
+
ifsplot(2, 24, 10,
|
348 |
+
title_gr=str(method), max_index=max_index, max_acc=max_acc,
|
349 |
+
acc=acc, xlabbel="top n features", ylabel="acc", filename="acc.png")
|
350 |
+
cm = load_model(X=X, y=y, test_samples=test_samples, test_labels=test_labels)
|
351 |
+
cmplot(3, 24, 10, cm=cm,
|
352 |
+
xlabbel="predicted labels", ylabel="true labels", filename='confusion_matrix.png')
|
353 |
+
return 'index-score.png', 'acc.png', "confusion_matrix.png", "ab.csv"
|
354 |
+
|
355 |
+
elif method == 'MRMR_FCQ':
|
356 |
+
combined, X, y, test_samples, test_labels = MRMR_FCQ(data=data, testsample=testsample, num_fea_int=num_fea_int)
|
357 |
+
# 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
|
358 |
+
sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
|
359 |
+
index_score_csv(sorted_combined=sorted_combined, filename='ab.csv')
|
360 |
+
# inde index start 1
|
361 |
+
index_1, index_0, scores = getindex_1(sorted_combined=sorted_combined)
|
362 |
+
# index-score.png
|
363 |
+
isplot(1, 24, 10, title_gr=str(method), x=index_1, y=scores,
|
364 |
+
xlabbel="index", ylabel="scores", filename="index-score.png")
|
365 |
+
# 选择分类器
|
366 |
+
clf = setclf(clf)
|
367 |
+
acc, max_acc, max_index = cv(X=X, y=y, index_0=index_0, clf=clf, n_fold=5)
|
368 |
+
# acc.png
|
369 |
+
ifsplot(2, 24, 10, title_gr=str(method), max_index=max_index,
|
370 |
+
max_acc=max_acc, acc=acc, xlabbel="top n features", ylabel="acc",
|
371 |
+
filename="acc.png")
|
372 |
+
# cal cm
|
373 |
+
cm = load_model(X=X, y=y, test_samples=test_samples, test_labels=test_labels)
|
374 |
+
cmplot(3, 24, 10,
|
375 |
+
cm=cm, xlabbel="predicted labels", ylabel="true labels", filename='confusion_matrix.png')
|
376 |
+
return 'index-score.png', 'acc.png', "confusion_matrix.png", "ab.csv"
|
377 |
+
|
378 |
+
elif method == 'Lasso':
|
379 |
+
X, y, inde, score, test_samples, test_labels, num_fea_int = lasso(data, testsample, num_fea_int)
|
380 |
+
index = []
|
381 |
+
for i in inde:
|
382 |
+
index.append(str(i))
|
383 |
+
plt.figure(1, figsize=(24, 12))
|
384 |
+
plt.title(str(method))
|
385 |
+
plt.plot(index[:num_fea_int], score[:num_fea_int])
|
386 |
+
|
387 |
+
# 设置x轴和y轴的标签
|
388 |
+
plt.xlabel('Feature Index', fontsize=40)
|
389 |
+
plt.ylabel('Feature Score', fontsize=40)
|
390 |
+
plt.savefig('Index_Score.png')
|
391 |
+
clf = setclf(clf)
|
392 |
+
|
393 |
+
inde = inde[:num_fea_int]
|
394 |
+
X = X.values
|
395 |
+
acc, max_acc, max_index = cv(X=X, y=y, index_0=inde, clf=clf, n_fold=5)
|
396 |
+
ifsplot(2, 24, 10, title_gr=str(method), max_index=max_index,
|
397 |
+
max_acc=max_acc, acc=acc, xlabbel="top n features", ylabel="acc",
|
398 |
+
filename="acc.png")
|
399 |
+
|
400 |
+
cm = load_model(X=X, y=y, test_samples=test_samples, test_labels=test_labels)
|
401 |
+
cmplot(3, 24, 10,
|
402 |
+
cm=cm, xlabbel="predicted labels", ylabel="true labels", filename='confusion_matrix.png')
|
403 |
+
|
404 |
+
return 'Index_Score.png', 'acc.png', "confusion_matrix.png", 'index-score.csv'
|
405 |
+
|
406 |
+
elif method == 'CFS':
|
407 |
+
pass
|