Spaces:

Justin-12138
/

FSALA

Runtime error

App Files Files Community

Justin-12138 commited on Oct 13, 2023

Commit

15afd18

1 Parent(s): 1aa5ac2

Upload 2 files

Browse files

Files changed (2) hide show

app.py +20 -474
src.py +407 -0

app.py CHANGED Viewed

@@ -1,475 +1,21 @@
 import gradio as gr
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import csv
-import seaborn as sns
-from scipy.stats import f_oneway
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.model_selection import cross_val_score
-from sklearn.naive_bayes import GaussianNB
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import SVC
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.linear_model import LassoLarsCV
-from sklearn.preprocessing import LabelEncoder
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler
-from sklearn.metrics import confusion_matrix
-class MyModel:
-    def __init__(self, model):
-        self.clf = model
-        self.scaler = None
-        self.label_encoder = None
-    def train(self, X, Y):
-        # 对标签进行编码
-        self.label_encoder = LabelEncoder()
-        Y = self.label_encoder.fit_transform(Y)
-        # 对特征进行标准化
-        self.scaler = StandardScaler()
-        X = self.scaler.fit_transform(X)
-        # 划分训练集和测试集
-        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
-        # 训练模型
-        self.clf.fit(X_train, Y_train)
-    def predict_samples(self, samples):
-        # 对样本进行相同的预处理步骤
-        samples = self.scaler.transform(samples)
-        # 使用模型进行预测
-        predictions = self.clf.predict(samples)
-        # 将预测的标签解码回原始值
-        predictions = self.label_encoder.inverse_transform(predictions)
-        return predictions
-def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
-    max_score_index = np.argmax(np.array(temp_scores))
-    current_score.append(temp_scores[max_score_index])
-    selected_indices.add(max_score_index)
-    selected_indices_list.append(max_score_index)
-def fs(data, method, num_fea_int, clf, testsample):
-    num_fea_int = int(num_fea_int)
-    if method == 'MRMR_FCD':
-        data = pd.read_csv(data.name)
-        X = data.iloc[:, :-1].values
-        y = data['Label'].values
-        num_features = len(X[0])
-        f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_features)]
-        # 添加起始特征的分数到current_score
-        current_score = [max(f_test_scores)]
-        # 索引从最高分数的特征开始
-        start_feature_index = f_test_scores.index(max(f_test_scores))
-        selected_indices = set()
-        selected_indices_list = []
-        selected_indices.add(start_feature_index)
-        selected_indices_list.append(start_feature_index)
-        pearson_score_matrix = np.zeros((num_features, num_features))
-        for _ in range(num_fea_int - 1):
-            temp_scores = []
-            for i in range(num_features):
-                if i in selected_indices:
-                    temp_scores.append(-float('inf'))
-                else:
-                    f_test_score = f_test_scores[i]
-                    diff = 0
-                    for j in selected_indices:
-                        # pearson score
-                        if j > i:
-                            if pearson_score_matrix[i][j] == 0:
-                                pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
-                            diff += pearson_score_matrix[i][j]
-                        else:
-                            if pearson_score_matrix[j][i] == 0:
-                                pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
-                            diff += pearson_score_matrix[j][i]
-                    temp_scores.append(f_test_score - diff / len(selected_indices))
-            add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
-        combined = list(zip(selected_indices_list, current_score))
-        # 使用sorted()函数对合并后的列表进行排序，key参数指定按照分数排序，reverse=True表示降序排序
-        sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
-        # 将索引和特征分数写入csv文件
-        with open('index-score.csv', 'w', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerow(["Index", "Score"])  # 写入列名
-            writer.writerows(sorted_combined)
-        inde = []
-        scores = []
-        for indy in sorted_combined:
-            inde.append(str(indy[0] + 1))
-            scores.append(indy[1])
-        # 创建第一个图索引-分数图
-        plt.figure(1, figsize=(24, 10))
-        plt.title("mRMR-FCD()")
-        plt.plot(inde, scores)
-        plt.xlabel("Feature Index")
-        plt.ylabel("Feature Score")
-        plt.savefig('Index_Score.png')
-        ff = []  # 将字符串索引转化成整型
-        for fire in inde:
-            ff.append(int(fire) - 1)
-        # 选择分类器
-        if clf == 'RF':
-            clf = RandomForestClassifier(n_jobs=-1)
-        elif clf == 'KNN':
-            clf = KNeighborsClassifier()
-        elif clf == 'DT':
-            clf = DecisionTreeClassifier()
-        elif clf == 'SVM':
-            clf = SVC(C=1.0, kernel='rbf')
-        elif clf == 'Naive Bayes':
-            clf = GaussianNB()
-        acc = []
-        # 对于index列表中的每个特征索引
-        for i in range(len(ff)):
-            # 使用前i个特征进行交叉验证
-            selected_features = X[:, [int(j) - 1 for j in ff[:i + 1]]]
-            scores = cross_val_score(clf, selected_features, y, cv=5)
-            # 计算平均准确率并添加到acc列表中
-            acc.append(scores.mean())
-        max_acc = max(acc)
-        max_index = acc.index(max_acc) + 1
-        # 创建第二个图IFS准确率率图
-        plt.figure(2, figsize=(24, 10))
-        plt.title("IFS_" + str(method) + "_Accuracy")
-        plt.plot(max_index, max_acc, 'ro')
-        plt.plot(acc)
-        plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
-                     ha='center')
-        # 设置x轴和y轴的标签
-        plt.xlabel("Top n features")
-        plt.ylabel('Accuracy')
-        plt.savefig('acc.png')
-        # 从test.csv加载测试样本和标签
-        testsample = pd.read_csv(testsample.name)
-        test_samples = testsample.iloc[:, :-1].values
-        test_labels = testsample.iloc[:, -1].values
-        # 加载模型
-        models = SVC(C=1.0, kernel='rbf')
-        my_model = MyModel(models)
-        my_model.train(X, y)
-        # 预测测试样本的标签
-        predictions = my_model.predict_samples(test_samples)
-        # 计算混淆矩阵
-        cm = confusion_matrix(test_labels, predictions)
-        # 使用seaborn绘制混淆矩阵热力图
-        plt.figure(figsize=(24, 10))
-        sns.heatmap(cm, annot=True, fmt='d')
-        plt.xlabel('predict labels')
-        plt.ylabel('True labels')
-        plt.grid(True)
-        plt.savefig('confusion_matrix.png')
-        return 'Index_Score.png', 'acc.png', "confusion_matrix.png", "index-score.csv"
-    elif method == 'MRMR_FCQ':
-        data = pd.read_csv(data.name)
-        X = data.iloc[:, :-1].values
-        y = data['Label'].values
-        num_fea_inttures = len(X[0])
-        f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_fea_inttures)]
-        # 添加起始特征的分数到current_score
-        current_score = [max(f_test_scores)]
-        # 索引从0开始
-        # start_feature_index = random.randint(0, num_features - 1)
-        # 索引从最高分数的特征开始
-        start_feature_index = f_test_scores.index(max(f_test_scores))
-        selected_indices = set()
-        selected_indices_list = []
-        selected_indices.add(start_feature_index)
-        selected_indices_list.append(start_feature_index)
-        pearson_score_matrix = np.zeros((num_fea_inttures, num_fea_inttures))
-        for _ in range(num_fea_int - 1):
-            temp_scores = []
-            for i in range(num_fea_inttures):
-                if i in selected_indices:
-                    temp_scores.append(-float('inf'))
-                else:
-                    f_test_score = f_test_scores[i]
-                    q = 0
-                    for j in selected_indices:
-                        # pearson score
-                        if j > i:
-                            if pearson_score_matrix[i][j] == 0:
-                                pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
-                            q += pearson_score_matrix[i][j]
-                        else:
-                            if pearson_score_matrix[j][i] == 0:
-                                pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
-                            q += pearson_score_matrix[j][i]
-                    temp_scores.append(f_test_score / (q / len(selected_indices)))
-            add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
-        combined = list(zip(selected_indices_list, current_score))
-        # 使用sorted()函数对合并后的列表进行排序，key参数指定按照分数排序，reverse=True表示降序排序
-        sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
-        # 将索引和特征分数写入csv文件
-        with open('index-score.csv', 'w', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerow(["Index", "Score"])  # 写入列名
-            writer.writerows(sorted_combined)
-        inde = []
-        scores = []
-        for indy in sorted_combined:
-            inde.append(str(indy[0] + 1))
-            scores.append(indy[1])
-        # 创建第一个图索引-分数图
-        plt.figure(1, figsize=(24, 10))
-        plt.title("mRMR-FCD()")
-        plt.plot(inde, scores)
-        plt.xlabel("Feature Index")
-        plt.ylabel("Feature Score")
-        plt.savefig('Index_Score.png')
-        ff = []  # 将字符串索引转化成整型
-        for fire in inde:
-            ff.append(int(fire) - 1)
-        # 选择分类器
-        if clf == 'RF':
-            clf = RandomForestClassifier(n_jobs=-1)
-        elif clf == 'KNN':
-            clf = KNeighborsClassifier()
-        elif clf == 'DT':
-            clf = DecisionTreeClassifier()
-        elif clf == 'SVM':
-            clf = SVC(C=1.0, kernel='rbf')
-        elif clf == 'Naive Bayes':
-            clf = GaussianNB()
-        acc = []
-        # 对于index列表中的每个特征索引
-        for i in range(len(ff)):
-            # 使用前i个特征进行交叉验证
-            selected_features = X[:, [int(j) - 1 for j in ff[:i + 1]]]
-            scores = cross_val_score(clf, selected_features, y, cv=5)
-            # 计算平均准确率并添加到acc列表中
-            acc.append(scores.mean())
-        max_acc = max(acc)
-        max_index = acc.index(max_acc) + 1
-        # 创建第二个图IFS准确率率图
-        plt.figure(2, figsize=(24, 10))
-        plt.title("IFS_" + str(method) + "_Accuracy")
-        plt.plot(max_index, max_acc, 'ro')
-        plt.plot(acc)
-        plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
-                     ha='center')
-        # 设置x轴和y轴的标签
-        plt.xlabel("Top n features")
-        plt.ylabel('Accuracy')
-        plt.savefig('acc.png')
-        # 从test.csv加载测试样本和标签
-        testsample = pd.read_csv(testsample.name)
-        test_samples = testsample.iloc[:, :-1].values
-        test_labels = testsample.iloc[:, -1].values
-        # 加载模型
-        models = SVC(C=1.0, kernel='rbf')
-        my_model = MyModel(models)
-        my_model.train(X, y)
-        # 预测测试样本的标签
-        predictions = my_model.predict_samples(test_samples)
-        # 计算混淆矩阵
-        cm = confusion_matrix(test_labels, predictions)
-        # 使用seaborn绘制混淆矩阵热力图
-        plt.figure(figsize=(24, 10))
-        sns.heatmap(cm, annot=True, fmt='d')
-        plt.xlabel('predict labels')
-        plt.ylabel('True labels')
-        plt.grid(True)
-        plt.savefig('confusion_matrix.png')
-        return 'Index_Score.png', 'acc.png', "confusion_matrix.png", "index-score.csv"
-    # 添加你们的代码在这里,我们先全部写成函数，然后再封装成类，主要是先把样子做出来
-    # 然后目前最终结果是返回一个图片，包含了含有特征的索引及其对应的分数的图，还有一张是增量式特征选择的准确率图
-    # 我上面的代码很多地方还可以优化，比如画图，选择分类器这些，但是你们都先不用管，把下面的几个elif写完先，然后我们再讨论优化代码的事情。
-    elif method == 'Lasso':
-        data = pd.read_csv(data.name)
-        X = data.iloc[:, :-1]
-        y = data.iloc[:, -1:].values.flatten()
-        cl = LassoLarsCV(cv=20, max_iter=80000).fit(X, y)
-        importance = np.abs(cl.coef_)
-        feature_names = list(X)
-        a = len(feature_names)
-        idx_features = (-importance).argsort()[:a]
-        name_features = np.array(feature_names)[idx_features]
-        result = pd.DataFrame({'index': idx_features, 'Score': importance[idx_features]})
-        result_rank = result.sort_values(by='Score', ascending=False, ignore_index=True)
-        result_rank.to_csv("index-score.csv")
-        inde = result_rank['index'].tolist()
-        score = result_rank['Score'].tolist()
-        index = []
-        for i in inde:
-            index.append(str(i))
-        plt.figure(1, figsize=(24, 12))
-        plt.title(str(method))
-        plt.plot(index[:num_fea_int], score[:num_fea_int])
-        # 设置x轴和y轴的标签
-        plt.xlabel('Feature Index')
-        plt.ylabel('Feature Score')
-        plt.savefig('Index_Score.png')
-        if clf == 'RF':
-            clf = RandomForestClassifier(n_jobs=-1)
-        elif clf == 'KNN':
-            clf = KNeighborsClassifier()
-        elif clf == 'DT':
-            clf = DecisionTreeClassifier()
-        elif clf == 'SVM':
-            clf = SVC()
-        elif clf == 'Naive Bayes':
-            clf = GaussianNB()
-        inde = inde[:num_fea_int]
-        index = index[:num_fea_int]
-        acc = []
-        # 对于index列表中的每个特征索引
-        X = data.iloc[:, :-1].values
-        for i in range(len(index)):
-            # 使用前i个特征进行交叉验证
-            selected_features = X[:, [int(j) - 1 for j in inde[:i + 1]]]
-            scores = cross_val_score(clf, selected_features, y, cv=5)
-            # 计算平均准确率并添加到acc列表中
-            acc.append(scores.mean())
-        max_acc = max(acc)
-        max_index = acc.index(max_acc) + 1
-        # ax2 = fig.add_subplot(212)
-        # ax2.set_title("IFS_" + str(method) + "_Accuracy")
-        plt.figure(2, figsize=(24, 10))
-        plt.plot(max_index, max_acc, 'ro')
-        plt.plot(acc)
-        plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
-                     ha='center')
-        # 设置x轴和y轴的标签
-        # ax2.set_xlabel()
-        # ax2.set_ylabel('Accuracy')
-        plt.xlabel('Top n features')
-        plt.ylabel('Accuracy')
-        plt.grid(True)
-        plt.savefig('acc.png')
-        testsample = pd.read_csv(testsample.name)
-        test_samples = testsample.iloc[:, :-1].values
-        test_labels = testsample.iloc[:, -1].values
-        models = SVC(C=1.0, kernel='rbf')
-        my_model = MyModel(models)
-        my_model.train(X, y)
-        # 预测测试样本的标签并计算准确率
-        predictions = my_model.predict_samples(test_samples)
-        # 计算混淆矩阵
-        cm = confusion_matrix(test_labels, predictions)
-        # 使用seaborn绘制混淆矩阵热力图
-        plt.figure(figsize=(24, 10))
-        sns.heatmap(cm, annot=True, fmt='d')
-        plt.xlabel('predict labels')
-        plt.ylabel('True labels')
-        plt.grid(True)
-        plt.savefig('confusion_matrix.png')
-        return 'Index_Score.png', 'acc.png', "confusion_matrix.png",'index-score.csv'
-    elif method == 'Ensemble':
-        pass
-    elif method == 'CI':
-        pass
-title = "FSALs: Robust Feature selection framework"
-description = r"""<center><img src='https://raw.githubusercontent.com/Justin-12138/bio_if/d1fdf085f8e679dcceecc2c05014b1d4a237e033/assets/favicon.svg' alt='FSALs logo'></center>
-<b>Official Gradio demo</b> for <a href='https://huggingface.co/spaces/Justin-12138/FSALA' target='_blank'><b>Application of Causal Inference in Alzheimer's Disease(CCFC2023)</b></a>.<br>
-🔥 Fsals is a Robust feature selection framework based on causal inference. <br>
-🤗 Try using fsals in different data sets.!<br>
-"""
-article = r"""
-If FSALs is helpful, please help to ⭐ the <a href='https://github.com/Justin-12138/bio_if' target='_blank'>Github Repo</a>. Thanks!
-[![GitHub Stars](https://img.shields.io/github/stars/Justin-12138/bio_if?style=social)](https://github.com/Justin-12138/bio_if)
----
-📝 **Citation**
-If our work is useful for your research, please consider citing:
-```bibtex
-@article{zlhl2023,
-    author = {Xiaolong Zhou, Zhao Liu, Yuchen Huang, Kun Lin},
-    title = {A Novel Ensemble Feature Selection Method for Biomarkers of Alzheimer's disease},
-    booktitle = {GUET Publisher},
-    year = {2023}
-}
-```
-📋 **License**
-This project is licensed under <a rel="license" href="https://github.com/Justin-12138/bio_if/blob/main/LICENSE">GPL License 2.0</a>.
-Redistribution and use for non-commercial purposes should follow this license.
-📧 **Contact**
-If you have any questions, please feel free to reach me out at <b>[email protected]</b>.
-<div>
-    🤗 Find Me:
-    <a href="https://github.com/Justin-12138"><img style="margin-top:0.5em; margin-bottom:2em" src="https://img.shields.io/github/followers/Justin-12138?style=social" alt="Github Follow"></a>
-</div>
-"""
-iface = gr.Interface(
-    fn=fs,
-    title=title,
-    description=description,
-    inputs=["file",
-            gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
-            gr.inputs.Number(),
-            gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes']),
-            "file"
-            ],
-    outputs=["image", "image", "image", "file"],
-    article=article,
-    examples=[
-        ["example_data.csv", 'MRMR_FCQ', 20, 'RF', "test.csv"],
-        ["example_data.csv", 'MRMR_FCD', 10, 'SVM', "test.csv"],
-        ["example_data.csv", 'MRMR_FCD', 30, 'KNN', "test.csv"],
-        ["example_data.csv", 'Lasso', 50, 'DT', "test.csv"],
-        ["example_data.csv", 'Lasso', 40, 'Naive Bayes', "test.csv"],
-    ],
-    allow_flagging="never"
-)
-iface.launch()

 import gradio as gr
+from src import des, fs
+if __name__ == '__main__':
+    iface = gr.Interface(
+        fn=fs,
+        title=des("title"),
+        description=des("description"),
+        article=des("article"),
+        inputs=des("inputs"),
+        outputs=des("outputs"),
+        examples=[
+            ["example_data.csv", 'MRMR_FCQ', 20, 'RF', "test.csv"],
+            ["example_data.csv", 'MRMR_FCD', 10, 'SVM', "test.csv"],
+            ["example_data.csv", 'MRMR_FCD', 30, 'KNN', "test.csv"],
+            ["example_data.csv", 'Lasso', 30, 'DT', "test.csv"],
+            ["example_data.csv", 'Lasso', 20, 'Naive Bayes', "test.csv"],
+        ],
+        allow_flagging="never"
+    )
+    iface.launch()

src.py ADDED Viewed

	@@ -0,0 +1,407 @@

+import csv
+import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from scipy.stats import f_oneway
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LassoLarsCV
+from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import train_test_split
+from sklearn.naive_bayes import GaussianNB
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.preprocessing import LabelEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.metrics import confusion_matrix
+class MyModel:
+    def __init__(self, model):
+        self.clf = model
+        self.scaler = None
+        self.label_encoder = None
+    def train(self, X, Y):
+        # 对标签进行编码
+        self.label_encoder = LabelEncoder()
+        Y = self.label_encoder.fit_transform(Y)
+        # 对特征进行标准化
+        self.scaler = StandardScaler()
+        X = self.scaler.fit_transform(X)
+        # 划分训练集和测试集
+        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
+        # 训练模型
+        self.clf.fit(X_train, Y_train)
+    def predict_samples(self, samples):
+        # 对样本进行相同的预处理步骤
+        samples = self.scaler.transform(samples)
+        # 使用模型进行预测
+        predictions = self.clf.predict(samples)
+        # 将预测的标签解码回原始值
+        predictions = self.label_encoder.inverse_transform(predictions)
+        return predictions
+# choose classifier
+def setclf(clf_name):
+    if clf_name == 'RF':
+        return RandomForestClassifier(n_jobs=-1)
+    elif clf_name == 'KNN':
+        return KNeighborsClassifier(n_jobs=-1)
+    elif clf_name == 'DT':
+        return DecisionTreeClassifier()
+    elif clf_name == 'SVM':
+        return SVC(C=1.0, kernel='rbf')
+    elif clf_name == 'Naive Bayes':
+        return GaussianNB()
+# cal score
+def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
+    max_score_index = np.argmax(np.array(temp_scores))
+    current_score.append(temp_scores[max_score_index])
+    selected_indices.add(max_score_index)
+    selected_indices_list.append(max_score_index)
+# load data
+def load_data(data, out_name):
+    # global X, y
+    data = pd.read_csv(data.name)
+    if not out_name:
+        X = data.iloc[:, :-1].values
+        y = data.iloc[:, -1].values
+    elif out_name:
+        X = data.iloc[:, :-1]
+        y = data.iloc[:, -1].values.flatten()
+    return X, y
+def MRMR_FCD(data, testsample, num_fea_int):
+    X, y = load_data(data, False)
+    # 从test.csv加载测试样本和标签
+    test_samples, test_labels = load_data(testsample, False)
+    # 获取特征数量
+    # max_fea_num = X.shape[1]
+    num_features = len(X[0])
+    f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_features)]
+    # 添加起始特征的分数到current_score
+    current_score = [max(f_test_scores)]
+    # 索引从最高分数的特征开始
+    start_feature_index = f_test_scores.index(max(f_test_scores))
+    selected_indices = set()
+    selected_indices_list = []
+    selected_indices.add(start_feature_index)
+    selected_indices_list.append(start_feature_index)
+    pearson_score_matrix = np.zeros((num_features, num_features))
+    for _ in range(num_fea_int - 1):
+        temp_scores = []
+        for i in range(num_features):
+            if i in selected_indices:
+                temp_scores.append(-float('inf'))
+            else:
+                f_test_score = f_test_scores[i]
+                diff = 0
+                for j in selected_indices:
+                    # pearson score
+                    if j > i:
+                        if pearson_score_matrix[i][j] == 0:
+                            pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
+                        diff += pearson_score_matrix[i][j]
+                    else:
+                        if pearson_score_matrix[j][i] == 0:
+                            pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
+                        diff += pearson_score_matrix[j][i]
+                temp_scores.append(f_test_score - diff / len(selected_indices))
+        add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
+    combined = list(zip(selected_indices_list, current_score))
+    return combined, X, y, test_samples, test_labels
+def MRMR_FCQ(data, testsample, num_fea_int):
+    X, y = load_data(data, False)
+    # 从test.csv加载测试样本和标签
+    test_samples, test_labels = load_data(testsample, False)
+    # 获取特征数量
+    # max_fea_num = X.shape[1]
+    num_fea_inttures = len(X[0])
+    f_test_scores = [f_oneway(X[:, i], y)[0] for i in range(num_fea_inttures)]
+    # 添加起始特征的分数到current_score
+    current_score = [max(f_test_scores)]
+    # 索引从0开始
+    # start_feature_index = random.randint(0, num_features - 1)
+    # 索引从最高分数的特征开始
+    start_feature_index = f_test_scores.index(max(f_test_scores))
+    selected_indices = set()
+    selected_indices_list = []
+    selected_indices.add(start_feature_index)
+    selected_indices_list.append(start_feature_index)
+    pearson_score_matrix = np.zeros((num_fea_inttures, num_fea_inttures))
+    for _ in range(num_fea_int - 1):
+        temp_scores = []
+        for i in range(num_fea_inttures):
+            if i in selected_indices:
+                temp_scores.append(-float('inf'))
+            else:
+                f_test_score = f_test_scores[i]
+                q = 0
+                for j in selected_indices:
+                    # pearson score
+                    if j > i:
+                        if pearson_score_matrix[i][j] == 0:
+                            pearson_score_matrix[i][j] = np.corrcoef(X[:, i], X[:, j])[0, 1]
+                        q += pearson_score_matrix[i][j]
+                    else:
+                        if pearson_score_matrix[j][i] == 0:
+                            pearson_score_matrix[j][i] = np.corrcoef(X[:, i], X[:, j])[0, 1]
+                        q += pearson_score_matrix[j][i]
+                temp_scores.append(f_test_score / (q / len(selected_indices)))
+        add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list)
+    combined = list(zip(selected_indices_list, current_score))
+    return combined, X, y, test_samples, test_labels
+def index_score_csv(sorted_combined, filename):
+    with open(filename, 'w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerow(["Index", "Score"])  # 写入列名
+        writer.writerows(sorted_combined)
+def isplot(num, width, height, title_gr, x, y, xlabbel, ylabel, filename):
+    plt.figure(num=num, figsize=(width, height))
+    plt.title(title_gr, fontsize=30)
+    plt.plot(x, y)
+    plt.xlabel(xlabel=xlabbel, fontsize=30)
+    plt.ylabel(ylabel=ylabel, fontsize=30)
+    plt.savefig(filename)
+def ifsplot(num, width, height, title_gr, max_index, max_acc, acc, xlabbel, ylabel, filename):
+    plt.figure(num=num, figsize=(width, height))
+    plt.title("IFS_" + title_gr + "_Accuracy", fontsize=40)
+    plt.plot(max_index, max_acc, 'ro')
+    plt.plot(acc)
+    plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
+                 ha='center', fontsize=40)
+    # 设置x轴和y轴的标签
+    plt.xlabel(xlabel=xlabbel, fontsize=40)
+    plt.ylabel(ylabel=ylabel, fontsize=40)
+    plt.savefig(filename)
+def cmplot(num, width, height, cm, xlabbel, ylabel, filename):
+    plt.figure(num=num, figsize=(width, height))
+    sns.heatmap(cm, annot=True, fmt='d')
+    plt.xlabel(xlabel=xlabbel, fontsize=40)
+    plt.plot(ylabel=ylabel, fontsize=40)
+    plt.grid(True)
+    plt.savefig(filename)
+    pass
+def des(choicce):
+    title = "FSALs: Robust Feature selection framework"
+    description = r"""<center><img src='https://raw.githubusercontent.com/Justin-12138/bio_if/d1fdf085f8e679dcceecc2c05014b1d4a237e033/assets/favicon.svg' alt='FSALs logo'></center>
+    <b>Official Gradio demo</b> for <a href='https://huggingface.co/spaces/Justin-12138/FSALA' target='_blank'><b>Application of Causal Inference in Alzheimer's Disease(CCFC2023)</b></a>.<br>
+    🔥 Fsals is a Robust feature selection framework based on causal inference. <br>
+    🤗 Try using fsals in different data sets.!<br>
+    """
+    article = r"""
+    If FSALs is helpful, please help to ⭐ the <a href='https://github.com/Justin-12138/bio_if' target='_blank'>Github Repo</a>. Thanks!
+    [![GitHub Stars](https://img.shields.io/github/stars/Justin-12138/bio_if?style=social)](https://github.com/Justin-12138/bio_if)
+    ---
+    📝 **Citation**
+    If our work is useful for your research, please consider citing:
+    ```bibtex
+    @article{zlhl2023,
+        author = {Xiaolong Zhou, Zhao Liu, Yuchen Huang, Kun Lin},
+        title = {A Novel Ensemble Feature Selection Method for Biomarkers of Alzheimer's disease},
+        booktitle = {GUET Publisher},
+        year = {2023}
+    }
+    ```
+    📋 **License**
+    This project is licensed under <a rel="license" href="https://github.com/Justin-12138/bio_if/blob/main/LICENSE">GPL License 2.0</a>.
+    Redistribution and use for non-commercial purposes should follow this license.
+    📧 **Contact**
+    If you have any questions, please feel free to reach me out at <b>[email protected]</b>.
+    <div>
+        🤗 Find Me:
+        <a href="https://github.com/Justin-12138"><img style="margin-top:0.5em; margin-bottom:2em" src="https://img.shields.io/github/followers/Justin-12138?style=social" alt="Github Follow"></a>
+    </div>
+    """
+    if choicce == "title":
+        return title
+    elif choicce == "description":
+        return description
+    elif choicce == "article":
+        return article
+    elif choicce == 'inputs':
+        inputs = [gr.inputs.File(label="Training data"),
+                  gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI'], label="method"),
+                  gr.inputs.Number(label="Num_feature(int)"),
+                  gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes'], label="classifier for CV"),
+                  gr.inputs.File(label="Testing data")
+                  ]
+        return inputs
+    elif choicce == 'outputs':
+        output = [gr.Image(label="Index_score"),
+                  gr.Image(label="IFS_Acc"),
+                  gr.Image(label="Confusion_matrix"),
+                  gr.File(label='Index_score.csv')]
+        return output
+def cv(X, y, index_0, clf, n_fold):
+    acc = []
+    for i in range(len(index_0)):
+        # 使用前i个特征进行交叉验证
+        selected_features = X[:, [int(j) - 1 for j in index_0[:i + 1]]]
+        scores = cross_val_score(clf, selected_features, y, cv=n_fold)
+        # 计算平均准确率并添加到acc列表中
+        acc.append(scores.mean())
+    max_acc = round(max(acc), 4)
+    max_index = acc.index(max(acc)) + 1
+    return acc, max_acc, max_index
+def getindex_1(sorted_combined):
+    index_1 = []
+    index_0 = []
+    scores = []
+    for indy in sorted_combined:
+        index_1.append(str(indy[0] + 1))
+        scores.append(indy[1])
+    for item in index_1:
+        index_0.append(int(item) - 1)
+    return index_1, index_0, scores
+def load_model(X, y, test_samples, test_labels):
+    models = SVC(C=1.0, kernel='rbf')
+    my_model = MyModel(models)
+    my_model.train(X, y)
+    # 预测测试样本的标签并计算准确率
+    predictions = my_model.predict_samples(test_samples)
+    # 计算混淆矩阵
+    cm = confusion_matrix(test_labels, predictions)
+    return cm
+def lasso(data, testsample, num_fea_int):
+    X, y = load_data(data, True)
+    test_samples, test_labels = load_data(testsample, False)
+    cl = LassoLarsCV(cv=20, max_iter=80000).fit(X, y)
+    importance = np.abs(cl.coef_)
+    feature_names = list(X)
+    a = len(feature_names)
+    idx_features = (-importance).argsort()[:a]
+    # name_features = np.array(feature_names)[idx_features]
+    result = pd.DataFrame({'index': idx_features, 'Score': importance[idx_features]})
+    result_rank = result.sort_values(by='Score', ascending=False, ignore_index=True)
+    result_rank.to_csv("index-score.csv")
+    inde = result_rank['index'].tolist()
+    score = result_rank['Score'].tolist()
+    return X, y, inde, score, test_samples, test_labels, num_fea_int
+def fs(data, method, num_fea_int, clf, testsample):
+    num_fea_int = int(num_fea_int)
+    if method == 'MRMR_FCD':
+        combined, X, y, test_samples, test_labels = MRMR_FCD(data=data, testsample=testsample, num_fea_int=num_fea_int)
+        # 使用sorted()函数对合并后的列表进行排序，key参数指定按照分数排序，reverse=True表示降序排序
+        sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
+        index_score_csv(sorted_combined=sorted_combined, filename='ab.csv')
+        index_1, index_0, scores = getindex_1(sorted_combined=sorted_combined)
+        # 画score.png
+        isplot(1, 24, 10,
+               title_gr=str(method), x=index_1, y=scores,
+               xlabbel="index", ylabel="scores", filename="index-score.png")
+        # 选择分类器
+        clf = setclf(clf)
+        acc, max_acc, max_index = cv(X=X, y=y, index_0=index_0, clf=clf, n_fold=10)
+        # 画acc.png
+        ifsplot(2, 24, 10,
+                title_gr=str(method), max_index=max_index, max_acc=max_acc,
+                acc=acc, xlabbel="top n features", ylabel="acc", filename="acc.png")
+        cm = load_model(X=X, y=y, test_samples=test_samples, test_labels=test_labels)
+        cmplot(3, 24, 10, cm=cm,
+               xlabbel="predicted labels", ylabel="true labels", filename='confusion_matrix.png')
+        return 'index-score.png', 'acc.png', "confusion_matrix.png", "ab.csv"
+    elif method == 'MRMR_FCQ':
+        combined, X, y, test_samples, test_labels = MRMR_FCQ(data=data, testsample=testsample, num_fea_int=num_fea_int)
+        # 使用sorted()函数对合并后的列表进行排序，key参数指定按照分数排序，reverse=True表示降序排序
+        sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
+        index_score_csv(sorted_combined=sorted_combined, filename='ab.csv')
+        # inde index start 1
+        index_1, index_0, scores = getindex_1(sorted_combined=sorted_combined)
+        # index-score.png
+        isplot(1, 24, 10, title_gr=str(method), x=index_1, y=scores,
+               xlabbel="index", ylabel="scores", filename="index-score.png")
+        # 选择分类器
+        clf = setclf(clf)
+        acc, max_acc, max_index = cv(X=X, y=y, index_0=index_0, clf=clf, n_fold=5)
+        # acc.png
+        ifsplot(2, 24, 10, title_gr=str(method), max_index=max_index,
+                max_acc=max_acc, acc=acc, xlabbel="top n features", ylabel="acc",
+                filename="acc.png")
+        # cal cm
+        cm = load_model(X=X, y=y, test_samples=test_samples, test_labels=test_labels)
+        cmplot(3, 24, 10,
+               cm=cm, xlabbel="predicted labels", ylabel="true labels", filename='confusion_matrix.png')
+        return 'index-score.png', 'acc.png', "confusion_matrix.png", "ab.csv"
+    elif method == 'Lasso':
+        X, y, inde, score, test_samples, test_labels, num_fea_int = lasso(data, testsample, num_fea_int)
+        index = []
+        for i in inde:
+            index.append(str(i))
+        plt.figure(1, figsize=(24, 12))
+        plt.title(str(method))
+        plt.plot(index[:num_fea_int], score[:num_fea_int])
+        # 设置x轴和y轴的标签
+        plt.xlabel('Feature Index', fontsize=40)
+        plt.ylabel('Feature Score', fontsize=40)
+        plt.savefig('Index_Score.png')
+        clf = setclf(clf)
+        inde = inde[:num_fea_int]
+        X = X.values
+        acc, max_acc, max_index = cv(X=X, y=y, index_0=inde, clf=clf, n_fold=5)
+        ifsplot(2, 24, 10, title_gr=str(method), max_index=max_index,
+                max_acc=max_acc, acc=acc, xlabbel="top n features", ylabel="acc",
+                filename="acc.png")
+        cm = load_model(X=X, y=y, test_samples=test_samples, test_labels=test_labels)
+        cmplot(3, 24, 10,
+               cm=cm, xlabbel="predicted labels", ylabel="true labels", filename='confusion_matrix.png')
+        return 'Index_Score.png', 'acc.png', "confusion_matrix.png", 'index-score.csv'
+    elif method == 'CFS':
+        pass