Spaces:

Justin-12138
/

FSALA

Runtime error

App Files Files Community

Justin-12138 commited on Oct 8, 2023

Commit

3da65a3

1 Parent(s): 024e270

Upload app.py

Browse files

Files changed (1) hide show

app.py +217 -4

app.py CHANGED Viewed

@@ -2,6 +2,13 @@ import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from scipy.stats import f_oneway
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_score
@@ -11,6 +18,8 @@ from sklearn.svm import SVC
 from sklearn.tree import DecisionTreeClassifier
 def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
     max_score_index = np.argmax(np.array(temp_scores))
     current_score.append(temp_scores[max_score_index])
@@ -202,7 +211,7 @@ def fs(data, method, num_fea_int, clf):
         max_index = acc.index(max_acc) + 1
         ax2 = fig.add_subplot(212)
-        ax2.set_title("IFS_"+str(method)+"_Accuracy")
         ax2.plot(max_index, max_acc, 'ro')
         ax2.plot(acc)
         ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
@@ -214,17 +223,220 @@ def fs(data, method, num_fea_int, clf):
         plt.savefig('output.png')
         return 'output.png'
     elif method == 'CFS':
         pass
     elif method == 'Lasso':
         pass
     elif method == 'Ensemble':
         pass
 iface = gr.Interface(
     fn=fs,
     inputs=["file",
             gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
             gr.inputs.Number(),
@@ -232,14 +444,15 @@ iface = gr.Interface(
             ],
     outputs="image",
     examples=[
         ["example_data.csv", 'MRMR_FCQ', 20, 'RF'],
         ["example_data.csv", 'MRMR_FCD', 10, 'SVM'],
         ["example_data.csv", 'MRMR_FCD', 30, 'KNN'],
-        ["example_data.csv", 'MRMR_FCQ', 50, 'DT'],
-        ["example_data.csv", 'MRMR_FCQ', 40, 'Naive Bayes'],
     ],
 )
 iface.launch()

 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from collections import Counter
+from scipy.stats import pointbiserialr
+from math import sqrt
+import copy
+import math
+import warnings
+# from pandas.core.common import SettingWithCopyWarning
 from scipy.stats import f_oneway
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_score
 from sklearn.tree import DecisionTreeClassifier
+# warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
 def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
     max_score_index = np.argmax(np.array(temp_scores))
     current_score.append(temp_scores[max_score_index])
         max_index = acc.index(max_acc) + 1
         ax2 = fig.add_subplot(212)
+        ax2.set_title("IFS_" + str(method) + "_Accuracy")
         ax2.plot(max_index, max_acc, 'ro')
         ax2.plot(acc)
         ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
         plt.savefig('output.png')
         return 'output.png'
+    # 添加你们的代码在这里,我们先全部写成函数，然后再封装成类，主要是先把样子做出来
+    # 然后目前最终结果是返回一个图片，包含了含有特征的索引及其对应的分数的图，还有一张是增量式特征选择的准确率图
+    # 我上面的代码很多地方还可以优化，比如画图，选择分类器这些，但是你们都先不用管，把下面的几个elif写完先，然后我们再讨论优化代码的事情。
     elif method == 'CFS':
+        def loadDataSet(fileName):
+            df = pd.read_csv(fileName)
+            return df
+        def writesortedlist(filename, thelist):
+            with open(filename, "w") as fw:
+                for item in thelist:
+                    fw.write(item[0] + "\t" + str(item[1]) + "\n")
+        def writethelist(filename, thelist):
+            with open(filename, "w") as fw:
+                for item in thelist:
+                    fw.write(item + "\n")
+        def getdatadf(datafile):
+            datadf = loadDataSet(datafile)
+            labellist = datadf["Label"].tolist()
+            del datadf["Label"]
+            return datadf, labellist
+        def CFSmethod(datafile):
+            datadf, labellist = getdatadf(datafile)
+            print(datadf)
+            selectdf = datadf.copy()
+            allflist = datadf.columns.tolist()
+            namelist = list(datadf.index)
+            print(namelist)
+            namelist = [int(var) for var in namelist]
+            selectdf["class"] = namelist
+            bestfset,sortlist = calBFset(selectdf, allflist)
+            writethelist("bestfeature.txt", bestfset)#保存最佳特征子集
+            return dict(sortlist)
+        def calmulmerit(selectdf, sublist):
+            retvalue = 0
+            label = "class"
+            k = len(sublist)
+            namelist = list(selectdf["class"])
+            classset = set(namelist)
+            caldf = selectdf[sublist]
+            allvalue = 0.0
+            for feature in sublist:
+                caldf = selectdf[sublist]
+                middlevalue = 0.0
+                for ind in classset:
+                    caldf[label] = np.where(selectdf[label] == ind, 1, 0)
+                    coeff = pointbiserialr(caldf[feature], caldf[label])
+                    middlevalue = abs(coeff.correlation) + middlevalue
+                allvalue = middlevalue / float(len(classset)) + allvalue
+            allvalue = allvalue / float(k)
+            corr = selectdf[sublist].corr()
+            corr.values[np.tril_indices_from(corr.values)] = np.nan
+            corr = abs(corr)
+            rff = corr.unstack().mean()
+            retvalue = (k * allvalue) / sqrt(k + k * (k - 1) * rff)
+            print(retvalue)
+            return retvalue
+        def calBFset(selectdf, allflist):
+            allfdict = getallfscoredict(selectdf, allflist)
+            sortedflist = sorted(allfdict.items(), key=lambda item: item[1], reverse=True)
+            writesortedlist("sorteddict.txt", sortedflist)#保存特征得分的降序
+            feaS = []
+            feaS.append(sortedflist[0][0])
+            maxvalue = sortedflist[0][1]
+            for i in range(1, len(sortedflist)):
+                print(str(i) + "/" + str(len(sortedflist)))
+                itemf = sortedflist[i][0]
+                feaS.append(itemf)
+                newvalue = calmulmerit(selectdf, feaS)
+                if newvalue > maxvalue:
+                    maxvalue = newvalue
+                else:
+                    feaS.pop()
+            print(feaS)
+            return feaS,sortedflist
+        def getallfscoredict(selectdf, allflist):
+            retdict = {}
+            k = 1
+            for f in allflist:
+                print(k)
+                k = k + 1
+                score = calonemerit(selectdf, f)
+                if math.isnan(score):
+                    continue
+                retdict[f] = score
+            return retdict
+        def calonemerit(selectdf, subname):
+            retvalue = 0
+            label = "class"
+            namelist = list(selectdf["class"])
+            classset = set(namelist)
+            caldf = selectdf[subname].to_frame()
+            allvalue = 0.0
+            for ind in classset:
+                caldf[label] = np.where(selectdf[label] == ind, 1, 0)
+                coeff = pointbiserialr(caldf[subname], caldf[label])
+                allvalue = abs(coeff.correlation) + allvalue
+            allvalue = allvalue / float(len(classset))
+            return allvalue
+        #获取特征分数
+        sortdict=CFSmethod(data.name)
+        # 画图
+        fig = plt.figure(figsize=(24, 12))
+        ax1 = fig.add_subplot(211)
+        ax1.set_title(str(method))
+        indexlist=list(range(1,len(sortdict.keys()+1)))
+        ax1.plot(indexlist, sortdict.values())  # 特征分数图
+        # 设置x轴和y轴的标签
+        ax1.set_xlabel('Feature Index')
+        ax1.set_ylabel('Feature Score')
+        #分类器
+        if clf == 'RF':
+            clf = RandomForestClassifier(n_jobs=-1)
+        elif clf == 'KNN':
+            clf = KNeighborsClassifier()
+        elif clf == 'DT':
+            clf = DecisionTreeClassifier()
+        elif clf == 'SVM':
+            clf = SVC()
+        elif clf == 'Naive Bayes':
+            clf = GaussianNB()
+        #画交叉验证图
+        acc = []
+        # 对于index列表中的每个特征索引
+        for i in range(len(indexlist)):
+            # 使用前i个特征进行交叉验证
+            selected_features = X[:,0:i]
+            scores = cross_val_score(clf, selected_features, y, cv=5)
+            # 计算平均准确率并添加到acc列表中
+            acc.append(scores.mean())
+        max_acc = max(acc)
+        max_index = acc.index(max_acc)#应该不用加1吧
+        ax2 = fig.add_subplot(212)
+        ax2.set_title("IFS_mRMR_FCD_Accuracy")
+        ax2.plot(max_index, max_acc, 'ro')
+        ax2.plot(acc)
+        ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
+                     ha='center')
+        # 设置x轴和y轴的标签
+        ax2.set_xlabel('Top n features')
+        ax2.set_ylabel('Accuracy')
+        plt.grid(True)
+        plt.savefig('output.png')
+        return 'output.png'
         pass
     elif method == 'Lasso':
         pass
     elif method == 'Ensemble':
         pass
+    elif method == 'CI':
+        pass
+title = "FSALs: Robust Feature selection framework"
+description = r"""<center><img src='https://raw.githubusercontent.com/Justin-12138/bio_if/d1fdf085f8e679dcceecc2c05014b1d4a237e033/assets/favicon.svg' alt='FSALs logo'></center>
+<b>Official Gradio demo</b> for <a href='https://huggingface.co/spaces/Justin-12138/FSALA' target='_blank'><b>Application of Causal Inference in Alzheimer's Disease(CCFC2023)</b></a>.<br>
+🔥 Fsals is a Robust feature selection framework based on causal inference. <br>
+🤗 Try using fsals in different data sets.!<br>
+"""
+article = r"""
+If FSALs is helpful, please help to ⭐ the <a href='https://github.com/Justin-12138/bio_if' target='_blank'>Github Repo</a>. Thanks!
+[![GitHub Stars](https://img.shields.io/github/stars/Justin-12138/bio_if?style=social)](https://github.com/Justin-12138/bio_if)
+---
+📝 **Citation**
+If our work is useful for your research, please consider citing:
+```bibtex
+@article{zlhl2023,
+    author = {Xiaolong Zhou, Zhao Liu, Yuchen Huang, Kun Lin},
+    title = {A Novel Ensemble Feature Selection Method for Biomarkers of Alzheimer's disease},
+    booktitle = {GUET Publisher},
+    year = {2023}
+}
+```
+📋 **License**
+This project is licensed under <a rel="license" href="https://github.com/Justin-12138/bio_if/blob/main/LICENSE">GPL License 2.0</a>.
+Redistribution and use for non-commercial purposes should follow this license.
+📧 **Contact**
+If you have any questions, please feel free to reach me out at <b>[email protected]</b>.
+<div>
+    🤗 Find Me:
+    <a href="https://github.com/Justin-12138"><img style="margin-top:0.5em; margin-bottom:2em" src="https://img.shields.io/github/followers/Justin-12138?style=social" alt="Github Follow"></a>
+</div>
+"""
 iface = gr.Interface(
     fn=fs,
+    title=title,
+    description=description,
     inputs=["file",
             gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
             gr.inputs.Number(),
             ],
     outputs="image",
+    article=article,
     examples=[
         ["example_data.csv", 'MRMR_FCQ', 20, 'RF'],
         ["example_data.csv", 'MRMR_FCD', 10, 'SVM'],
         ["example_data.csv", 'MRMR_FCD', 30, 'KNN'],
+        ["example_data.csv", 'CFS', 50, 'DT'],
+        ["example_data.csv", 'CFS', 40, 'Naive Bayes'],
     ],
+    allow_flagging="never"
 )
 iface.launch()