Justin-12138 commited on
Commit
194cf1f
·
1 Parent(s): 77bd380

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +190 -75
app.py CHANGED
@@ -2,13 +2,11 @@ import gradio as gr
2
  import matplotlib.pyplot as plt
3
  import numpy as np
4
  import pandas as pd
5
- from collections import Counter
6
  from scipy.stats import pointbiserialr
7
  from math import sqrt
8
- import copy
9
  import math
10
- import warnings
11
- # from pandas.core.common import SettingWithCopyWarning
12
  from scipy.stats import f_oneway
13
  from sklearn.ensemble import RandomForestClassifier
14
  from sklearn.model_selection import cross_val_score
@@ -17,9 +15,45 @@ from sklearn.neighbors import KNeighborsClassifier
17
  from sklearn.svm import SVC
18
  from sklearn.tree import DecisionTreeClassifier
19
  from sklearn.linear_model import LassoLarsCV
 
 
 
 
20
 
21
 
22
- # warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
25
  max_score_index = np.argmax(np.array(temp_scores))
@@ -28,7 +62,7 @@ def add_max_score_to_list(temp_scores, current_score, selected_indices, selected
28
  selected_indices_list.append(max_score_index)
29
 
30
 
31
- def fs(data, method, num_fea_int, clf):
32
  num_fea_int = int(num_fea_int)
33
  if method == 'MRMR_FCD':
34
  data = pd.read_csv(data.name)
@@ -68,25 +102,29 @@ def fs(data, method, num_fea_int, clf):
68
  combined = list(zip(selected_indices_list, current_score))
69
  # 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
70
  sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
 
 
 
 
 
 
71
  inde = []
72
  scores = []
73
-
74
  for indy in sorted_combined:
75
  inde.append(str(indy[0] + 1))
76
  scores.append(indy[1])
77
- fig = plt.figure(figsize=(24, 12))
78
- ax1 = fig.add_subplot(211)
79
- ax1.set_title("mRMR-FCD()")
80
- ax1.plot(inde, scores)
81
-
82
- # 设置x轴和y轴的标签
83
- ax1.set_xlabel('Feature Index')
84
- ax1.set_ylabel('Feature Score')
85
-
86
- ff = []
87
  for fire in inde:
88
  ff.append(int(fire) - 1)
89
-
90
  if clf == 'RF':
91
  clf = RandomForestClassifier(n_jobs=-1)
92
  elif clf == 'KNN':
@@ -94,7 +132,7 @@ def fs(data, method, num_fea_int, clf):
94
  elif clf == 'DT':
95
  clf = DecisionTreeClassifier()
96
  elif clf == 'SVM':
97
- clf = SVC()
98
  elif clf == 'Naive Bayes':
99
  clf = GaussianNB()
100
 
@@ -109,18 +147,42 @@ def fs(data, method, num_fea_int, clf):
109
  max_acc = max(acc)
110
  max_index = acc.index(max_acc) + 1
111
 
112
- ax2 = fig.add_subplot(212)
113
- ax2.set_title("IFS_mRMR_FCD_Accuracy")
114
- ax2.plot(max_index, max_acc, 'ro')
115
- ax2.plot(acc)
116
- ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
 
117
  ha='center')
118
  # 设置x轴��y轴的标签
119
- ax2.set_xlabel('Top n features')
120
- ax2.set_ylabel('Accuracy')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  plt.grid(True)
122
- plt.savefig('output.png')
123
- return 'output.png'
 
124
 
125
  elif method == 'MRMR_FCQ':
126
  data = pd.read_csv(data.name)
@@ -166,25 +228,29 @@ def fs(data, method, num_fea_int, clf):
166
 
167
  # 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
168
  sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
 
 
 
 
 
 
169
  inde = []
170
  scores = []
171
-
172
  for indy in sorted_combined:
173
  inde.append(str(indy[0] + 1))
174
  scores.append(indy[1])
175
- fig = plt.figure(figsize=(24, 12))
176
- ax1 = fig.add_subplot(211)
177
- ax1.set_title(str(method))
178
- ax1.plot(inde, scores)
179
-
180
- # 设置x轴和y轴的标签
181
- ax1.set_xlabel('Feature Index')
182
- ax1.set_ylabel('Feature Score')
183
-
184
- ff = []
185
  for fire in inde:
186
  ff.append(int(fire) - 1)
187
-
188
  if clf == 'RF':
189
  clf = RandomForestClassifier(n_jobs=-1)
190
  elif clf == 'KNN':
@@ -192,7 +258,7 @@ def fs(data, method, num_fea_int, clf):
192
  elif clf == 'DT':
193
  clf = DecisionTreeClassifier()
194
  elif clf == 'SVM':
195
- clf = SVC()
196
  elif clf == 'Naive Bayes':
197
  clf = GaussianNB()
198
 
@@ -207,18 +273,44 @@ def fs(data, method, num_fea_int, clf):
207
  max_acc = max(acc)
208
  max_index = acc.index(max_acc) + 1
209
 
210
- ax2 = fig.add_subplot(212)
211
- ax2.set_title("IFS_" + str(method) + "_Accuracy")
212
- ax2.plot(max_index, max_acc, 'ro')
213
- ax2.plot(acc)
214
- ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
 
215
  ha='center')
216
  # 设置x轴和y轴的标签
217
- ax2.set_xlabel('Top n features')
218
- ax2.set_ylabel('Accuracy')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  plt.grid(True)
220
- plt.savefig('output.png')
221
- return 'output.png'
 
 
 
222
  # 添加你们的代码在这里,我们先全部写成函数,然后再封装成类,主要是先把样子做出来
223
  # 然后目前最终结果是返回一个图片,包含了含有特征的索引及其对应的分数的图,还有一张是增量式特征选择的准确率图
224
  # 我上面的代码很多地方还可以优化,比如画图,选择分类器这些,但是你们都先不用管,把下面的几个elif写完先,然后我们再讨论优化代码的事情。
@@ -231,7 +323,6 @@ def fs(data, method, num_fea_int, clf):
231
 
232
  importance = np.abs(cl.coef_)
233
  feature_names = list(X)
234
- print(feature_names)
235
  a = len(feature_names)
236
 
237
  idx_features = (-importance).argsort()[:a]
@@ -241,21 +332,21 @@ def fs(data, method, num_fea_int, clf):
241
  print((name_features)[i], importance[idx_features][i])
242
  result = pd.DataFrame({'index': idx_features, 'Score': importance[idx_features]})
243
  result_rank = result.sort_values(by='Score', ascending=False, ignore_index=True)
 
244
  inde = result_rank['index'].tolist()
245
  score = result_rank['Score'].tolist()
246
 
247
  index = []
248
  for i in inde:
249
  index.append(str(i))
250
- fig = plt.figure(figsize=(24, 12))
251
- ax1 = fig.add_subplot(211)
252
- ax1.set_title(str(method))
253
- ax1.plot(index[:num_fea_int], score[:num_fea_int])
254
 
255
  # 设置x轴和y轴的标签
256
- ax1.set_xlabel('Feature Index')
257
- ax1.set_ylabel('Feature Score')
258
-
259
  if clf == 'RF':
260
  clf = RandomForestClassifier(n_jobs=-1)
261
  elif clf == 'KNN':
@@ -266,13 +357,13 @@ def fs(data, method, num_fea_int, clf):
266
  clf = SVC()
267
  elif clf == 'Naive Bayes':
268
  clf = GaussianNB()
 
269
  inde = inde[:num_fea_int]
270
  index = index[:num_fea_int]
271
  acc = []
272
  # 对于index列表中的每个特征索引
273
 
274
  X = data.iloc[:, :-1].values
275
- print(X)
276
  for i in range(len(index)):
277
  # 使用前i个特征进行交叉验证
278
  selected_features = X[:, [int(j) - 1 for j in inde[:i + 1]]]
@@ -282,18 +373,42 @@ def fs(data, method, num_fea_int, clf):
282
  max_acc = max(acc)
283
  max_index = acc.index(max_acc) + 1
284
 
285
- ax2 = fig.add_subplot(212)
286
- ax2.set_title("IFS_" + str(method) + "_Accuracy")
287
- ax2.plot(max_index, max_acc, 'ro')
288
- ax2.plot(acc)
289
- ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
 
290
  ha='center')
291
  # 设置x轴和y轴的标签
292
- ax2.set_xlabel('Top n features')
293
- ax2.set_ylabel('Accuracy')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  plt.grid(True)
295
- plt.savefig('output.png')
296
- return 'output.png'
 
297
 
298
  elif method == 'Ensemble':
299
  pass
@@ -349,16 +464,16 @@ iface = gr.Interface(
349
  gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
350
  gr.inputs.Number(),
351
  gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes']),
352
-
353
  ],
354
- outputs="image",
355
  article=article,
356
  examples=[
357
- ["example_data.csv", 'MRMR_FCQ', 20, 'RF'],
358
- ["example_data.csv", 'MRMR_FCD', 10, 'SVM'],
359
- ["example_data.csv", 'MRMR_FCD', 30, 'KNN'],
360
- ["example_data.csv", 'Lasso', 50, 'DT'],
361
- ["example_data.csv", 'Lasso', 40, 'Naive Bayes'],
362
  ],
363
  allow_flagging="never"
364
  )
 
2
  import matplotlib.pyplot as plt
3
  import numpy as np
4
  import pandas as pd
 
5
  from scipy.stats import pointbiserialr
6
  from math import sqrt
 
7
  import math
8
+ import csv
9
+ import seaborn as sns
10
  from scipy.stats import f_oneway
11
  from sklearn.ensemble import RandomForestClassifier
12
  from sklearn.model_selection import cross_val_score
 
15
  from sklearn.svm import SVC
16
  from sklearn.tree import DecisionTreeClassifier
17
  from sklearn.linear_model import LassoLarsCV
18
+ from sklearn.preprocessing import LabelEncoder
19
+ from sklearn.model_selection import train_test_split
20
+ from sklearn.preprocessing import StandardScaler
21
+ from sklearn.metrics import confusion_matrix
22
 
23
 
24
+ class MyModel:
25
+ def __init__(self, model):
26
+ self.clf = model
27
+ self.scaler = None
28
+ self.label_encoder = None
29
+
30
+ def train(self, X, Y):
31
+ # 对标签进行编码
32
+ self.label_encoder = LabelEncoder()
33
+ Y = self.label_encoder.fit_transform(Y)
34
+
35
+ # 对特征进行标准化
36
+ self.scaler = StandardScaler()
37
+ X = self.scaler.fit_transform(X)
38
+
39
+ # 划分训练集和测试集
40
+ X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
41
+
42
+ # 训练模型
43
+ self.clf.fit(X_train, Y_train)
44
+
45
+ def predict_samples(self, samples):
46
+ # 对样本进行相同的预处理步骤
47
+ samples = self.scaler.transform(samples)
48
+
49
+ # 使用模型进行预测
50
+ predictions = self.clf.predict(samples)
51
+
52
+ # 将预测的标签解码回原始值
53
+ predictions = self.label_encoder.inverse_transform(predictions)
54
+
55
+ return predictions
56
+
57
 
58
  def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
59
  max_score_index = np.argmax(np.array(temp_scores))
 
62
  selected_indices_list.append(max_score_index)
63
 
64
 
65
+ def fs(data, method, num_fea_int, clf, testsample):
66
  num_fea_int = int(num_fea_int)
67
  if method == 'MRMR_FCD':
68
  data = pd.read_csv(data.name)
 
102
  combined = list(zip(selected_indices_list, current_score))
103
  # 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
104
  sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
105
+ # 将索引和特征分数写入csv文件
106
+ with open('index-score.csv', 'w', newline='') as file:
107
+ writer = csv.writer(file)
108
+ writer.writerow(["Index", "Score"]) # 写入列名
109
+ writer.writerows(sorted_combined)
110
+
111
  inde = []
112
  scores = []
 
113
  for indy in sorted_combined:
114
  inde.append(str(indy[0] + 1))
115
  scores.append(indy[1])
116
+ # 创建第一个图索引-分数图
117
+ plt.figure(1, figsize=(24, 10))
118
+ plt.title("mRMR-FCD()")
119
+ plt.plot(inde, scores)
120
+ plt.xlabel("Feature Index")
121
+ plt.ylabel("Feature Score")
122
+ plt.savefig('Index_Score.png')
123
+
124
+ ff = [] # 将字符串索引转化成整型
 
125
  for fire in inde:
126
  ff.append(int(fire) - 1)
127
+ # 选择分类器
128
  if clf == 'RF':
129
  clf = RandomForestClassifier(n_jobs=-1)
130
  elif clf == 'KNN':
 
132
  elif clf == 'DT':
133
  clf = DecisionTreeClassifier()
134
  elif clf == 'SVM':
135
+ clf = SVC(C=1.0, kernel='rbf')
136
  elif clf == 'Naive Bayes':
137
  clf = GaussianNB()
138
 
 
147
  max_acc = max(acc)
148
  max_index = acc.index(max_acc) + 1
149
 
150
+ # 创建第二个图IFS准确率率图
151
+ plt.figure(2, figsize=(24, 10))
152
+ plt.title("IFS_" + str(method) + "_Accuracy")
153
+ plt.plot(max_index, max_acc, 'ro')
154
+ plt.plot(acc)
155
+ plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
156
  ha='center')
157
  # 设置x轴��y轴的标签
158
+ plt.xlabel("Top n features")
159
+ plt.ylabel('Accuracy')
160
+ plt.savefig('acc.png')
161
+
162
+ # 从test.csv加载测试样本和标签
163
+ testsample = pd.read_csv(testsample.name)
164
+ test_samples = testsample.iloc[:, :-1].values
165
+ test_labels = testsample.iloc[:, -1].values
166
+
167
+ # 加载模型
168
+ models = SVC(C=1.0, kernel='rbf')
169
+ my_model = MyModel(models)
170
+ my_model.train(X, y)
171
+
172
+ # 预测测试样本的标签
173
+ predictions = my_model.predict_samples(test_samples)
174
+ # 计算混淆矩阵
175
+ cm = confusion_matrix(test_labels, predictions)
176
+
177
+ # 使用seaborn绘制混淆矩阵热力图
178
+ plt.figure(figsize=(24, 10))
179
+ sns.heatmap(cm, annot=True, fmt='d')
180
+ plt.xlabel('predict labels')
181
+ plt.ylabel('True labels')
182
  plt.grid(True)
183
+ plt.savefig('confusion_matrix.png')
184
+
185
+ return 'Index_Score.png', 'acc.png', "confusion_matrix.png", "index-score.csv"
186
 
187
  elif method == 'MRMR_FCQ':
188
  data = pd.read_csv(data.name)
 
228
 
229
  # 使用sorted()函数对合并后的列表进行排序,key参数指定按照分数排序,reverse=True表示降序排序
230
  sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
231
+ # 将索引和特征分数写入csv文件
232
+ with open('index-score.csv', 'w', newline='') as file:
233
+ writer = csv.writer(file)
234
+ writer.writerow(["Index", "Score"]) # 写入列名
235
+ writer.writerows(sorted_combined)
236
+
237
  inde = []
238
  scores = []
 
239
  for indy in sorted_combined:
240
  inde.append(str(indy[0] + 1))
241
  scores.append(indy[1])
242
+ # 创建第一个图索引-分数图
243
+ plt.figure(1, figsize=(24, 10))
244
+ plt.title("mRMR-FCD()")
245
+ plt.plot(inde, scores)
246
+ plt.xlabel("Feature Index")
247
+ plt.ylabel("Feature Score")
248
+ plt.savefig('Index_Score.png')
249
+
250
+ ff = [] # 将字符串索引转化成整型
 
251
  for fire in inde:
252
  ff.append(int(fire) - 1)
253
+ # 选择分类器
254
  if clf == 'RF':
255
  clf = RandomForestClassifier(n_jobs=-1)
256
  elif clf == 'KNN':
 
258
  elif clf == 'DT':
259
  clf = DecisionTreeClassifier()
260
  elif clf == 'SVM':
261
+ clf = SVC(C=1.0, kernel='rbf')
262
  elif clf == 'Naive Bayes':
263
  clf = GaussianNB()
264
 
 
273
  max_acc = max(acc)
274
  max_index = acc.index(max_acc) + 1
275
 
276
+ # 创建第二个图IFS准确率率图
277
+ plt.figure(2, figsize=(24, 10))
278
+ plt.title("IFS_" + str(method) + "_Accuracy")
279
+ plt.plot(max_index, max_acc, 'ro')
280
+ plt.plot(acc)
281
+ plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
282
  ha='center')
283
  # 设置x轴和y轴的标签
284
+ plt.xlabel("Top n features")
285
+ plt.ylabel('Accuracy')
286
+ plt.savefig('acc.png')
287
+
288
+ # 从test.csv加载测试样本和标签
289
+ testsample = pd.read_csv(testsample.name)
290
+ test_samples = testsample.iloc[:, :-1].values
291
+ test_labels = testsample.iloc[:, -1].values
292
+
293
+ # 加载模型
294
+ models = SVC(C=1.0, kernel='rbf')
295
+ my_model = MyModel(models)
296
+ my_model.train(X, y)
297
+
298
+ # 预测测试样本的标签
299
+ predictions = my_model.predict_samples(test_samples)
300
+ # 计算混淆矩阵
301
+ cm = confusion_matrix(test_labels, predictions)
302
+
303
+ # 使用seaborn绘制混淆矩阵热力图
304
+ plt.figure(figsize=(24, 10))
305
+ sns.heatmap(cm, annot=True, fmt='d')
306
+ plt.xlabel('predict labels')
307
+ plt.ylabel('True labels')
308
  plt.grid(True)
309
+ plt.savefig('confusion_matrix.png')
310
+
311
+ return 'Index_Score.png', 'acc.png', "confusion_matrix.png", "index-score.csv"
312
+
313
+
314
  # 添加你们的代码在这里,我们先全部写成函数,然后再封装成类,主要是先把样子做出来
315
  # 然后目前最终结果是返回一个图片,包含了含有特征的索引及其对应的分数的图,还有一张是增量式特征选择的准确率图
316
  # 我上面的代码很多地方还可以优化,比如画图,选择分类器这些,但是你们都先不用管,把下面的几个elif写完先,然后我们再讨论优化代码的事情。
 
323
 
324
  importance = np.abs(cl.coef_)
325
  feature_names = list(X)
 
326
  a = len(feature_names)
327
 
328
  idx_features = (-importance).argsort()[:a]
 
332
  print((name_features)[i], importance[idx_features][i])
333
  result = pd.DataFrame({'index': idx_features, 'Score': importance[idx_features]})
334
  result_rank = result.sort_values(by='Score', ascending=False, ignore_index=True)
335
+ result_rank.to_csv("index-score.csv")
336
  inde = result_rank['index'].tolist()
337
  score = result_rank['Score'].tolist()
338
 
339
  index = []
340
  for i in inde:
341
  index.append(str(i))
342
+ plt.figure(1, figsize=(24, 12))
343
+ plt.title(str(method))
344
+ plt.plot(index[:num_fea_int], score[:num_fea_int])
 
345
 
346
  # 设置x轴和y轴的标签
347
+ plt.xlabel('Feature Index')
348
+ plt.ylabel('Feature Score')
349
+ plt.savefig('Index_Score.png')
350
  if clf == 'RF':
351
  clf = RandomForestClassifier(n_jobs=-1)
352
  elif clf == 'KNN':
 
357
  clf = SVC()
358
  elif clf == 'Naive Bayes':
359
  clf = GaussianNB()
360
+
361
  inde = inde[:num_fea_int]
362
  index = index[:num_fea_int]
363
  acc = []
364
  # 对于index列表中的每个特征索引
365
 
366
  X = data.iloc[:, :-1].values
 
367
  for i in range(len(index)):
368
  # 使用前i个特征进行交叉验证
369
  selected_features = X[:, [int(j) - 1 for j in inde[:i + 1]]]
 
373
  max_acc = max(acc)
374
  max_index = acc.index(max_acc) + 1
375
 
376
+ # ax2 = fig.add_subplot(212)
377
+ # ax2.set_title("IFS_" + str(method) + "_Accuracy")
378
+ plt.figure(2, figsize=(24, 10))
379
+ plt.plot(max_index, max_acc, 'ro')
380
+ plt.plot(acc)
381
+ plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
382
  ha='center')
383
  # 设置x轴和y轴的标签
384
+ # ax2.set_xlabel()
385
+ # ax2.set_ylabel('Accuracy')
386
+ plt.xlabel('Top n features')
387
+ plt.ylabel('Accuracy')
388
+ plt.grid(True)
389
+ plt.savefig('acc.png')
390
+
391
+ testsample = pd.read_csv(testsample.name)
392
+ test_samples = testsample.iloc[:, :-1].values
393
+ test_labels = testsample.iloc[:, -1].values
394
+ models = SVC(C=1.0, kernel='rbf')
395
+ my_model = MyModel(models)
396
+ my_model.train(X, y)
397
+
398
+ # 预测测试样本的标签并计算准确率
399
+ predictions = my_model.predict_samples(test_samples)
400
+ # 计算混淆矩阵
401
+ cm = confusion_matrix(test_labels, predictions)
402
+
403
+ # 使用seaborn绘制混淆矩阵热力图
404
+ plt.figure(figsize=(24, 10))
405
+ sns.heatmap(cm, annot=True, fmt='d')
406
+ plt.xlabel('predict labels')
407
+ plt.ylabel('True labels')
408
  plt.grid(True)
409
+ plt.savefig('confusion_matrix.png')
410
+
411
+ return 'Index_Score.png', 'acc.png', "confusion_matrix.png",'index-score.csv'
412
 
413
  elif method == 'Ensemble':
414
  pass
 
464
  gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
465
  gr.inputs.Number(),
466
  gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes']),
467
+ "file"
468
  ],
469
+ outputs=["image", "image", "image", "file"],
470
  article=article,
471
  examples=[
472
+ ["example_data.csv", 'MRMR_FCQ', 20, 'RF', "test.csv"],
473
+ ["example_data.csv", 'MRMR_FCD', 10, 'SVM', "test.csv"],
474
+ ["example_data.csv", 'MRMR_FCD', 30, 'KNN', "test.csv"],
475
+ ["example_data.csv", 'CFS', 50, 'DT', "test.csv"],
476
+ ["example_data.csv", 'CFS', 40, 'Naive Bayes', "test.csv"],
477
  ],
478
  allow_flagging="never"
479
  )