Justin-12138 commited on
Commit
3da65a3
·
1 Parent(s): 024e270

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -4
app.py CHANGED
@@ -2,6 +2,13 @@ import gradio as gr
2
  import matplotlib.pyplot as plt
3
  import numpy as np
4
  import pandas as pd
 
 
 
 
 
 
 
5
  from scipy.stats import f_oneway
6
  from sklearn.ensemble import RandomForestClassifier
7
  from sklearn.model_selection import cross_val_score
@@ -11,6 +18,8 @@ from sklearn.svm import SVC
11
  from sklearn.tree import DecisionTreeClassifier
12
 
13
 
 
 
14
  def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
15
  max_score_index = np.argmax(np.array(temp_scores))
16
  current_score.append(temp_scores[max_score_index])
@@ -202,7 +211,7 @@ def fs(data, method, num_fea_int, clf):
202
  max_index = acc.index(max_acc) + 1
203
 
204
  ax2 = fig.add_subplot(212)
205
- ax2.set_title("IFS_"+str(method)+"_Accuracy")
206
  ax2.plot(max_index, max_acc, 'ro')
207
  ax2.plot(acc)
208
  ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
@@ -214,17 +223,220 @@ def fs(data, method, num_fea_int, clf):
214
  plt.savefig('output.png')
215
  return 'output.png'
216
 
 
 
 
217
  elif method == 'CFS':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  pass
220
  elif method == 'Lasso':
221
  pass
222
  elif method == 'Ensemble':
223
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
 
226
  iface = gr.Interface(
227
  fn=fs,
 
 
 
228
  inputs=["file",
229
  gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
230
  gr.inputs.Number(),
@@ -232,14 +444,15 @@ iface = gr.Interface(
232
 
233
  ],
234
  outputs="image",
 
235
  examples=[
236
  ["example_data.csv", 'MRMR_FCQ', 20, 'RF'],
237
  ["example_data.csv", 'MRMR_FCD', 10, 'SVM'],
238
  ["example_data.csv", 'MRMR_FCD', 30, 'KNN'],
239
- ["example_data.csv", 'MRMR_FCQ', 50, 'DT'],
240
- ["example_data.csv", 'MRMR_FCQ', 40, 'Naive Bayes'],
241
-
242
  ],
 
243
  )
244
 
245
  iface.launch()
 
2
  import matplotlib.pyplot as plt
3
  import numpy as np
4
  import pandas as pd
5
+ from collections import Counter
6
+ from scipy.stats import pointbiserialr
7
+ from math import sqrt
8
+ import copy
9
+ import math
10
+ import warnings
11
+ # from pandas.core.common import SettingWithCopyWarning
12
  from scipy.stats import f_oneway
13
  from sklearn.ensemble import RandomForestClassifier
14
  from sklearn.model_selection import cross_val_score
 
18
  from sklearn.tree import DecisionTreeClassifier
19
 
20
 
21
+ # warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
22
+
23
  def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
24
  max_score_index = np.argmax(np.array(temp_scores))
25
  current_score.append(temp_scores[max_score_index])
 
211
  max_index = acc.index(max_acc) + 1
212
 
213
  ax2 = fig.add_subplot(212)
214
+ ax2.set_title("IFS_" + str(method) + "_Accuracy")
215
  ax2.plot(max_index, max_acc, 'ro')
216
  ax2.plot(acc)
217
  ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
 
223
  plt.savefig('output.png')
224
  return 'output.png'
225
 
226
+ # 添加你们的代码在这里,我们先全部写成函数,然后再封装成类,主要是先把样子做出来
227
+ # 然后目前最终结果是返回一个图片,包含了含有特征的索引及其对应的分数的图,还有一张是增量式特征选择的准确率图
228
+ # 我上面的代码很多地方还可以优化,比如画图,选择分类器这些,但是你们都先不用管,把下面的几个elif写完先,然后我们再讨论优化代码的事情。
229
  elif method == 'CFS':
230
+ def loadDataSet(fileName):
231
+ df = pd.read_csv(fileName)
232
+ return df
233
+
234
+ def writesortedlist(filename, thelist):
235
+ with open(filename, "w") as fw:
236
+ for item in thelist:
237
+ fw.write(item[0] + "\t" + str(item[1]) + "\n")
238
+
239
+ def writethelist(filename, thelist):
240
+ with open(filename, "w") as fw:
241
+ for item in thelist:
242
+ fw.write(item + "\n")
243
+
244
+ def getdatadf(datafile):
245
+ datadf = loadDataSet(datafile)
246
+ labellist = datadf["Label"].tolist()
247
+ del datadf["Label"]
248
+ return datadf, labellist
249
+
250
+ def CFSmethod(datafile):
251
+
252
+ datadf, labellist = getdatadf(datafile)
253
+ print(datadf)
254
+ selectdf = datadf.copy()
255
+ allflist = datadf.columns.tolist()
256
+ namelist = list(datadf.index)
257
+ print(namelist)
258
+ namelist = [int(var) for var in namelist]
259
+ selectdf["class"] = namelist
260
+
261
+ bestfset,sortlist = calBFset(selectdf, allflist)
262
+ writethelist("bestfeature.txt", bestfset)#保存最佳特征子集
263
+
264
+ return dict(sortlist)
265
+
266
+ def calmulmerit(selectdf, sublist):
267
+ retvalue = 0
268
+ label = "class"
269
+ k = len(sublist)
270
+ namelist = list(selectdf["class"])
271
+ classset = set(namelist)
272
+ caldf = selectdf[sublist]
273
+ allvalue = 0.0
274
+ for feature in sublist:
275
+ caldf = selectdf[sublist]
276
+ middlevalue = 0.0
277
+ for ind in classset:
278
+ caldf[label] = np.where(selectdf[label] == ind, 1, 0)
279
+ coeff = pointbiserialr(caldf[feature], caldf[label])
280
+ middlevalue = abs(coeff.correlation) + middlevalue
281
+ allvalue = middlevalue / float(len(classset)) + allvalue
282
+ allvalue = allvalue / float(k)
283
+
284
+ corr = selectdf[sublist].corr()
285
+ corr.values[np.tril_indices_from(corr.values)] = np.nan
286
+ corr = abs(corr)
287
+ rff = corr.unstack().mean()
288
+ retvalue = (k * allvalue) / sqrt(k + k * (k - 1) * rff)
289
+ print(retvalue)
290
+ return retvalue
291
+
292
+ def calBFset(selectdf, allflist):
293
+ allfdict = getallfscoredict(selectdf, allflist)
294
+ sortedflist = sorted(allfdict.items(), key=lambda item: item[1], reverse=True)
295
+ writesortedlist("sorteddict.txt", sortedflist)#保存特征得分的降序
296
+ feaS = []
297
+ feaS.append(sortedflist[0][0])
298
+ maxvalue = sortedflist[0][1]
299
+ for i in range(1, len(sortedflist)):
300
+ print(str(i) + "/" + str(len(sortedflist)))
301
+ itemf = sortedflist[i][0]
302
+ feaS.append(itemf)
303
+ newvalue = calmulmerit(selectdf, feaS)
304
+ if newvalue > maxvalue:
305
+ maxvalue = newvalue
306
+ else:
307
+ feaS.pop()
308
+ print(feaS)
309
+ return feaS,sortedflist
310
+
311
+ def getallfscoredict(selectdf, allflist):
312
+ retdict = {}
313
+ k = 1
314
+ for f in allflist:
315
+ print(k)
316
+ k = k + 1
317
+ score = calonemerit(selectdf, f)
318
+ if math.isnan(score):
319
+ continue
320
+ retdict[f] = score
321
+ return retdict
322
+
323
+ def calonemerit(selectdf, subname):
324
+ retvalue = 0
325
+ label = "class"
326
+ namelist = list(selectdf["class"])
327
+ classset = set(namelist)
328
+ caldf = selectdf[subname].to_frame()
329
+ allvalue = 0.0
330
+ for ind in classset:
331
+ caldf[label] = np.where(selectdf[label] == ind, 1, 0)
332
+ coeff = pointbiserialr(caldf[subname], caldf[label])
333
+ allvalue = abs(coeff.correlation) + allvalue
334
+ allvalue = allvalue / float(len(classset))
335
+ return allvalue
336
+
337
+ #获取特征分数
338
+ sortdict=CFSmethod(data.name)
339
+ # 画图
340
+ fig = plt.figure(figsize=(24, 12))
341
+ ax1 = fig.add_subplot(211)
342
+ ax1.set_title(str(method))
343
+ indexlist=list(range(1,len(sortdict.keys()+1)))
344
+ ax1.plot(indexlist, sortdict.values()) # 特征分数图
345
+ # 设置x轴和y轴的标签
346
+ ax1.set_xlabel('Feature Index')
347
+ ax1.set_ylabel('Feature Score')
348
+
349
+ #分类器
350
+ if clf == 'RF':
351
+ clf = RandomForestClassifier(n_jobs=-1)
352
+ elif clf == 'KNN':
353
+ clf = KNeighborsClassifier()
354
+ elif clf == 'DT':
355
+ clf = DecisionTreeClassifier()
356
+ elif clf == 'SVM':
357
+ clf = SVC()
358
+ elif clf == 'Naive Bayes':
359
+ clf = GaussianNB()
360
+ #画交叉验证图
361
+ acc = []
362
+ # 对于index列表中的每个特征索引
363
+ for i in range(len(indexlist)):
364
+ # 使用前i个特征进行交叉验证
365
+ selected_features = X[:,0:i]
366
+ scores = cross_val_score(clf, selected_features, y, cv=5)
367
+ # 计算平均准确率并添加到acc列表中
368
+ acc.append(scores.mean())
369
+ max_acc = max(acc)
370
+ max_index = acc.index(max_acc)#应该不用加1吧
371
+
372
+ ax2 = fig.add_subplot(212)
373
+ ax2.set_title("IFS_mRMR_FCD_Accuracy")
374
+ ax2.plot(max_index, max_acc, 'ro')
375
+ ax2.plot(acc)
376
+ ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
377
+ ha='center')
378
+ # 设置x轴和y轴的标签
379
+ ax2.set_xlabel('Top n features')
380
+ ax2.set_ylabel('Accuracy')
381
+ plt.grid(True)
382
+ plt.savefig('output.png')
383
+ return 'output.png'
384
 
385
  pass
386
  elif method == 'Lasso':
387
  pass
388
  elif method == 'Ensemble':
389
  pass
390
+ elif method == 'CI':
391
+ pass
392
+
393
+
394
+ title = "FSALs: Robust Feature selection framework"
395
+ description = r"""<center><img src='https://raw.githubusercontent.com/Justin-12138/bio_if/d1fdf085f8e679dcceecc2c05014b1d4a237e033/assets/favicon.svg' alt='FSALs logo'></center>
396
+ <b>Official Gradio demo</b> for <a href='https://huggingface.co/spaces/Justin-12138/FSALA' target='_blank'><b>Application of Causal Inference in Alzheimer's Disease(CCFC2023)</b></a>.<br>
397
+ 🔥 Fsals is a Robust feature selection framework based on causal inference. <br>
398
+ 🤗 Try using fsals in different data sets.!<br>
399
+ """
400
+ article = r"""
401
+ If FSALs is helpful, please help to ⭐ the <a href='https://github.com/Justin-12138/bio_if' target='_blank'>Github Repo</a>. Thanks!
402
+ [![GitHub Stars](https://img.shields.io/github/stars/Justin-12138/bio_if?style=social)](https://github.com/Justin-12138/bio_if)
403
+
404
+ ---
405
+
406
+ 📝 **Citation**
407
+
408
+ If our work is useful for your research, please consider citing:
409
+ ```bibtex
410
+ @article{zlhl2023,
411
+ author = {Xiaolong Zhou, Zhao Liu, Yuchen Huang, Kun Lin},
412
+ title = {A Novel Ensemble Feature Selection Method for Biomarkers of Alzheimer's disease},
413
+ booktitle = {GUET Publisher},
414
+ year = {2023}
415
+ }
416
+ ```
417
+
418
+ 📋 **License**
419
+
420
+ This project is licensed under <a rel="license" href="https://github.com/Justin-12138/bio_if/blob/main/LICENSE">GPL License 2.0</a>.
421
+ Redistribution and use for non-commercial purposes should follow this license.
422
+
423
+ 📧 **Contact**
424
+
425
+ If you have any questions, please feel free to reach me out at <b>[email protected]</b>.
426
+
427
+ <div>
428
+ 🤗 Find Me:
429
+ <a href="https://github.com/Justin-12138"><img style="margin-top:0.5em; margin-bottom:2em" src="https://img.shields.io/github/followers/Justin-12138?style=social" alt="Github Follow"></a>
430
+ </div>
431
+ """
432
+
433
 
434
 
435
  iface = gr.Interface(
436
  fn=fs,
437
+ title=title,
438
+ description=description,
439
+
440
  inputs=["file",
441
  gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
442
  gr.inputs.Number(),
 
444
 
445
  ],
446
  outputs="image",
447
+ article=article,
448
  examples=[
449
  ["example_data.csv", 'MRMR_FCQ', 20, 'RF'],
450
  ["example_data.csv", 'MRMR_FCD', 10, 'SVM'],
451
  ["example_data.csv", 'MRMR_FCD', 30, 'KNN'],
452
+ ["example_data.csv", 'CFS', 50, 'DT'],
453
+ ["example_data.csv", 'CFS', 40, 'Naive Bayes'],
 
454
  ],
455
+ allow_flagging="never"
456
  )
457
 
458
  iface.launch()