Justin-12138 commited on
Commit
024e270
·
1 Parent(s): 32b30f4

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +245 -0
  2. example_data.csv +0 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import matplotlib.pyplot as plt
3
+ import numpy as np
4
+ import pandas as pd
5
+ from scipy.stats import f_oneway
6
+ from sklearn.ensemble import RandomForestClassifier
7
+ from sklearn.model_selection import cross_val_score
8
+ from sklearn.naive_bayes import GaussianNB
9
+ from sklearn.neighbors import KNeighborsClassifier
10
+ from sklearn.svm import SVC
11
+ from sklearn.tree import DecisionTreeClassifier
12
+
13
+
14
+ def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
15
+ max_score_index = np.argmax(np.array(temp_scores))
16
+ current_score.append(temp_scores[max_score_index])
17
+ selected_indices.add(max_score_index)
18
+ selected_indices_list.append(max_score_index)
19
+
20
+
21
+ def fs(data, method, num_fea_int, clf):
22
+ data = pd.read_csv(data.name)
23
+ X = data.iloc[:, :-1].values
24
+ y = data['Label'].values
25
+ num_fea_int = int(num_fea_int)
26
+ if method == 'MRMR_FCD':
27
+ num_features = len(X[0])
28
+
29
+ f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_features)]
30
+ # 添加起始特征的分数到current_score
31
+ current_score = [max(f_test_scores)]
32
+
33
+ # 索引从0开始
34
+ # start_feature_index = random.randint(0, num_features - 1)
35
+ # 索引从最高分数的特征开始
36
+ start_feature_index = f_test_scores.index(max(f_test_scores))
37
+ selected_indices = set()
38
+ selected_indices_list = []
39
+ selected_indices.add(start_feature_index)
40
+ selected_indices_list.append(start_feature_index)
41
+
42
+ pearson_score_matrix = np.zeros((num_features, num_features))
43
+
44
+ for _ in range(num_fea_int - 1):
45
+ temp_scores = []
46
+ for i in range(num_features):
47
+ if i in selected_indices:
48
+ temp_scores.append(-float('inf'))
49
+ else:
50
+ f_test_score = f_test_scores[i]
51
+ diff = 0
52
+ for j in selected_indices:
53
+ # pearson score
54
+ if j > i:
55
+ if pearson_score_matrix[i][j] == 0:
56
+ pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
57
+ diff += pearson_score_matrix[i][j]
58
+ else:
59
+ if pearson_score_matrix[j][i] == 0:
60
+ pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
61
+ diff += pearson_score_matrix[j][i]
62
+ # diff += np.corrcoef(X[:,i], X[:,j])[0, 1]
63
+ temp_scores.append(f_test_score - diff / len(selected_indices))
64
+ add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
65
+ combined = list(zip(selected_indices_list, current_score))
66
+ # 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
67
+ sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
68
+ inde = []
69
+ scores = []
70
+
71
+ for indy in sorted_combined:
72
+ inde.append(str(indy[0] + 1))
73
+ scores.append(indy[1])
74
+ fig = plt.figure(figsize=(24, 12))
75
+ ax1 = fig.add_subplot(211)
76
+ ax1.set_title("mRMR-FCD()")
77
+ ax1.plot(inde, scores)
78
+
79
+ # 设置x轴和y轴的标签
80
+ ax1.set_xlabel('Feature Index')
81
+ ax1.set_ylabel('Feature Score')
82
+
83
+ ff = []
84
+ for fire in inde:
85
+ ff.append(int(fire) - 1)
86
+
87
+ if clf == 'RF':
88
+ clf = RandomForestClassifier(n_jobs=-1)
89
+ elif clf == 'KNN':
90
+ clf = KNeighborsClassifier()
91
+ elif clf == 'DT':
92
+ clf = DecisionTreeClassifier()
93
+ elif clf == 'SVM':
94
+ clf = SVC()
95
+ elif clf == 'Naive Bayes':
96
+ clf = GaussianNB()
97
+
98
+ acc = []
99
+ # 对于index列表中的每个特征索引
100
+ for i in range(len(ff)):
101
+ # 使用前i个特征进行交叉验证
102
+ selected_features = X[:, [int(j) - 1 for j in ff[:i + 1]]]
103
+ scores = cross_val_score(clf, selected_features, y, cv=5)
104
+ # 计算平均准确率并添加到acc列表中
105
+ acc.append(scores.mean())
106
+ max_acc = max(acc)
107
+ max_index = acc.index(max_acc) + 1
108
+
109
+ ax2 = fig.add_subplot(212)
110
+ ax2.set_title("IFS_mRMR_FCD_Accuracy")
111
+ ax2.plot(max_index, max_acc, 'ro')
112
+ ax2.plot(acc)
113
+ ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
114
+ ha='center')
115
+ # 设置x轴和y轴的标签
116
+ ax2.set_xlabel('Top n features')
117
+ ax2.set_ylabel('Accuracy')
118
+ plt.grid(True)
119
+ plt.savefig('output.png')
120
+ return 'output.png'
121
+
122
+ elif method == 'MRMR_FCQ':
123
+ num_fea_inttures = len(X[0])
124
+ f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_fea_inttures)]
125
+
126
+ # 添加起���特征的分数到current_score
127
+ current_score = [max(f_test_scores)]
128
+
129
+ # 索引从0开始
130
+ # start_feature_index = random.randint(0, num_features - 1)
131
+ # 索引从最高分数的特征开始
132
+ start_feature_index = f_test_scores.index(max(f_test_scores))
133
+
134
+ selected_indices = set()
135
+ selected_indices_list = []
136
+ selected_indices.add(start_feature_index)
137
+ selected_indices_list.append(start_feature_index)
138
+ pearson_score_matrix = np.zeros((num_fea_inttures, num_fea_inttures))
139
+ for _ in range(num_fea_int - 1):
140
+ temp_scores = []
141
+ for i in range(num_fea_inttures):
142
+ if i in selected_indices:
143
+ temp_scores.append(-float('inf'))
144
+ else:
145
+ f_test_score = f_test_scores[i]
146
+ q = 0
147
+ for j in selected_indices:
148
+ # pearson score
149
+ if j > i:
150
+ if pearson_score_matrix[i][j] == 0:
151
+ pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
152
+ q += pearson_score_matrix[i][j]
153
+ else:
154
+ if pearson_score_matrix[j][i] == 0:
155
+ pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
156
+ q += pearson_score_matrix[j][i]
157
+ temp_scores.append(f_test_score / (q / len(selected_indices)))
158
+ add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
159
+ combined = list(zip(selected_indices_list, current_score))
160
+
161
+ # 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
162
+ sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
163
+ inde = []
164
+ scores = []
165
+
166
+ for indy in sorted_combined:
167
+ inde.append(str(indy[0] + 1))
168
+ scores.append(indy[1])
169
+ fig = plt.figure(figsize=(24, 12))
170
+ ax1 = fig.add_subplot(211)
171
+ ax1.set_title(str(method))
172
+ ax1.plot(inde, scores)
173
+
174
+ # 设置x轴和y轴的标签
175
+ ax1.set_xlabel('Feature Index')
176
+ ax1.set_ylabel('Feature Score')
177
+
178
+ ff = []
179
+ for fire in inde:
180
+ ff.append(int(fire) - 1)
181
+
182
+ if clf == 'RF':
183
+ clf = RandomForestClassifier(n_jobs=-1)
184
+ elif clf == 'KNN':
185
+ clf = KNeighborsClassifier()
186
+ elif clf == 'DT':
187
+ clf = DecisionTreeClassifier()
188
+ elif clf == 'SVM':
189
+ clf = SVC()
190
+ elif clf == 'Naive Bayes':
191
+ clf = GaussianNB()
192
+
193
+ acc = []
194
+ # 对于index列表中的每个特征索引
195
+ for i in range(len(ff)):
196
+ # 使用前i个特征进行交叉验证
197
+ selected_features = X[:, [int(j) - 1 for j in ff[:i + 1]]]
198
+ scores = cross_val_score(clf, selected_features, y, cv=5)
199
+ # 计算平均准确率并添加到acc列表中
200
+ acc.append(scores.mean())
201
+ max_acc = max(acc)
202
+ max_index = acc.index(max_acc) + 1
203
+
204
+ ax2 = fig.add_subplot(212)
205
+ ax2.set_title("IFS_"+str(method)+"_Accuracy")
206
+ ax2.plot(max_index, max_acc, 'ro')
207
+ ax2.plot(acc)
208
+ ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
209
+ ha='center')
210
+ # 设置x轴和y轴的标签
211
+ ax2.set_xlabel('Top n features')
212
+ ax2.set_ylabel('Accuracy')
213
+ plt.grid(True)
214
+ plt.savefig('output.png')
215
+ return 'output.png'
216
+
217
+ elif method == 'CFS':
218
+
219
+ pass
220
+ elif method == 'Lasso':
221
+ pass
222
+ elif method == 'Ensemble':
223
+ pass
224
+
225
+
226
+ iface = gr.Interface(
227
+ fn=fs,
228
+ inputs=["file",
229
+ gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
230
+ gr.inputs.Number(),
231
+ gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes']),
232
+
233
+ ],
234
+ outputs="image",
235
+ examples=[
236
+ ["example_data.csv", 'MRMR_FCQ', 20, 'RF'],
237
+ ["example_data.csv", 'MRMR_FCD', 10, 'SVM'],
238
+ ["example_data.csv", 'MRMR_FCD', 30, 'KNN'],
239
+ ["example_data.csv", 'MRMR_FCQ', 50, 'DT'],
240
+ ["example_data.csv", 'MRMR_FCQ', 40, 'Naive Bayes'],
241
+
242
+ ],
243
+ )
244
+
245
+ iface.launch()
example_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==3.44.4
2
+ matplotlib==3.7.2
3
+ numpy==1.24.4
4
+ pandas==2.1.1
5
+ scikit_learn==1.3.1
6
+ scipy==1.11.2