Spaces:

Justin-12138
/

FSALA

Runtime error

App Files Files Community

Justin-12138 commited on Oct 9, 2023

Commit

194cf1f

1 Parent(s): 77bd380

Upload app.py

Browse files

Files changed (1) hide show

app.py +190 -75

app.py CHANGED Viewed

@@ -2,13 +2,11 @@ import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from collections import Counter
 from scipy.stats import pointbiserialr
 from math import sqrt
-import copy
 import math
-import warnings
-# from pandas.core.common import SettingWithCopyWarning
 from scipy.stats import f_oneway
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_score
@@ -17,9 +15,45 @@ from sklearn.neighbors import KNeighborsClassifier
 from sklearn.svm import SVC
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.linear_model import LassoLarsCV
-# warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
 def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
     max_score_index = np.argmax(np.array(temp_scores))
@@ -28,7 +62,7 @@ def add_max_score_to_list(temp_scores, current_score, selected_indices, selected
     selected_indices_list.append(max_score_index)
-def fs(data, method, num_fea_int, clf):
     num_fea_int = int(num_fea_int)
     if method == 'MRMR_FCD':
         data = pd.read_csv(data.name)
@@ -68,25 +102,29 @@ def fs(data, method, num_fea_int, clf):
         combined = list(zip(selected_indices_list, current_score))
         # 使用sorted()函数对合并后的列表进行排序，key参数指定按照分数排序，reverse=True表示降序排序
         sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
         inde = []
         scores = []
         for indy in sorted_combined:
             inde.append(str(indy[0] + 1))
             scores.append(indy[1])
-        fig = plt.figure(figsize=(24, 12))
-        ax1 = fig.add_subplot(211)
-        ax1.set_title("mRMR-FCD()")
-        ax1.plot(inde, scores)
-        # 设置x轴和y轴的标签
-        ax1.set_xlabel('Feature Index')
-        ax1.set_ylabel('Feature Score')
-        ff = []
         for fire in inde:
             ff.append(int(fire) - 1)
         if clf == 'RF':
             clf = RandomForestClassifier(n_jobs=-1)
         elif clf == 'KNN':
@@ -94,7 +132,7 @@ def fs(data, method, num_fea_int, clf):
         elif clf == 'DT':
             clf = DecisionTreeClassifier()
         elif clf == 'SVM':
-            clf = SVC()
         elif clf == 'Naive Bayes':
             clf = GaussianNB()
@@ -109,18 +147,42 @@ def fs(data, method, num_fea_int, clf):
         max_acc = max(acc)
         max_index = acc.index(max_acc) + 1
-        ax2 = fig.add_subplot(212)
-        ax2.set_title("IFS_mRMR_FCD_Accuracy")
-        ax2.plot(max_index, max_acc, 'ro')
-        ax2.plot(acc)
-        ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
                      ha='center')
         # 设置x轴��y轴的标签
-        ax2.set_xlabel('Top n features')
-        ax2.set_ylabel('Accuracy')
         plt.grid(True)
-        plt.savefig('output.png')
-        return 'output.png'
     elif method == 'MRMR_FCQ':
         data = pd.read_csv(data.name)
@@ -166,25 +228,29 @@ def fs(data, method, num_fea_int, clf):
         # 使用sorted()函数对合并后的列表进行排序，key参数指定按照分数排序，reverse=True表示降序排序
         sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
         inde = []
         scores = []
         for indy in sorted_combined:
             inde.append(str(indy[0] + 1))
             scores.append(indy[1])
-        fig = plt.figure(figsize=(24, 12))
-        ax1 = fig.add_subplot(211)
-        ax1.set_title(str(method))
-        ax1.plot(inde, scores)
-        # 设置x轴和y轴的标签
-        ax1.set_xlabel('Feature Index')
-        ax1.set_ylabel('Feature Score')
-        ff = []
         for fire in inde:
             ff.append(int(fire) - 1)
         if clf == 'RF':
             clf = RandomForestClassifier(n_jobs=-1)
         elif clf == 'KNN':
@@ -192,7 +258,7 @@ def fs(data, method, num_fea_int, clf):
         elif clf == 'DT':
             clf = DecisionTreeClassifier()
         elif clf == 'SVM':
-            clf = SVC()
         elif clf == 'Naive Bayes':
             clf = GaussianNB()
@@ -207,18 +273,44 @@ def fs(data, method, num_fea_int, clf):
         max_acc = max(acc)
         max_index = acc.index(max_acc) + 1
-        ax2 = fig.add_subplot(212)
-        ax2.set_title("IFS_" + str(method) + "_Accuracy")
-        ax2.plot(max_index, max_acc, 'ro')
-        ax2.plot(acc)
-        ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
                      ha='center')
         # 设置x轴和y轴的标签
-        ax2.set_xlabel('Top n features')
-        ax2.set_ylabel('Accuracy')
         plt.grid(True)
-        plt.savefig('output.png')
-        return 'output.png'
     # 添加你们的代码在这里,我们先全部写成函数，然后再封装成类，主要是先把样子做出来
     # 然后目前最终结果是返回一个图片，包含了含有特征的索引及其对应的分数的图，还有一张是增量式特征选择的准确率图
     # 我上面的代码很多地方还可以优化，比如画图，选择分类器这些，但是你们都先不用管，把下面的几个elif写完先，然后我们再讨论优化代码的事情。
@@ -231,7 +323,6 @@ def fs(data, method, num_fea_int, clf):
         importance = np.abs(cl.coef_)
         feature_names = list(X)
-        print(feature_names)
         a = len(feature_names)
         idx_features = (-importance).argsort()[:a]
@@ -241,21 +332,21 @@ def fs(data, method, num_fea_int, clf):
             print((name_features)[i], importance[idx_features][i])
         result = pd.DataFrame({'index': idx_features, 'Score': importance[idx_features]})
         result_rank = result.sort_values(by='Score', ascending=False, ignore_index=True)
         inde = result_rank['index'].tolist()
         score = result_rank['Score'].tolist()
         index = []
         for i in inde:
             index.append(str(i))
-        fig = plt.figure(figsize=(24, 12))
-        ax1 = fig.add_subplot(211)
-        ax1.set_title(str(method))
-        ax1.plot(index[:num_fea_int], score[:num_fea_int])
         # 设置x轴和y轴的标签
-        ax1.set_xlabel('Feature Index')
-        ax1.set_ylabel('Feature Score')
         if clf == 'RF':
             clf = RandomForestClassifier(n_jobs=-1)
         elif clf == 'KNN':
@@ -266,13 +357,13 @@ def fs(data, method, num_fea_int, clf):
             clf = SVC()
         elif clf == 'Naive Bayes':
             clf = GaussianNB()
         inde = inde[:num_fea_int]
         index = index[:num_fea_int]
         acc = []
         # 对于index列表中的每个特征索引
         X = data.iloc[:, :-1].values
-        print(X)
         for i in range(len(index)):
             # 使用前i个特征进行交叉验证
             selected_features = X[:, [int(j) - 1 for j in inde[:i + 1]]]
@@ -282,18 +373,42 @@ def fs(data, method, num_fea_int, clf):
         max_acc = max(acc)
         max_index = acc.index(max_acc) + 1
-        ax2 = fig.add_subplot(212)
-        ax2.set_title("IFS_" + str(method) + "_Accuracy")
-        ax2.plot(max_index, max_acc, 'ro')
-        ax2.plot(acc)
-        ax2.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
                      ha='center')
         # 设置x轴和y轴的标签
-        ax2.set_xlabel('Top n features')
-        ax2.set_ylabel('Accuracy')
         plt.grid(True)
-        plt.savefig('output.png')
-        return 'output.png'
     elif method == 'Ensemble':
         pass
@@ -349,16 +464,16 @@ iface = gr.Interface(
             gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
             gr.inputs.Number(),
             gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes']),
             ],
-    outputs="image",
     article=article,
     examples=[
-        ["example_data.csv", 'MRMR_FCQ', 20, 'RF'],
-        ["example_data.csv", 'MRMR_FCD', 10, 'SVM'],
-        ["example_data.csv", 'MRMR_FCD', 30, 'KNN'],
-        ["example_data.csv", 'Lasso', 50, 'DT'],
-        ["example_data.csv", 'Lasso', 40, 'Naive Bayes'],
     ],
     allow_flagging="never"
 )

 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from scipy.stats import pointbiserialr
 from math import sqrt
 import math
+import csv
+import seaborn as sns
 from scipy.stats import f_oneway
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_score
 from sklearn.svm import SVC
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.linear_model import LassoLarsCV
+from sklearn.preprocessing import LabelEncoder
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import confusion_matrix
+class MyModel:
+    def __init__(self, model):
+        self.clf = model
+        self.scaler = None
+        self.label_encoder = None
+    def train(self, X, Y):
+        # 对标签进行编码
+        self.label_encoder = LabelEncoder()
+        Y = self.label_encoder.fit_transform(Y)
+        # 对特征进行标准化
+        self.scaler = StandardScaler()
+        X = self.scaler.fit_transform(X)
+        # 划分训练集和测试集
+        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
+        # 训练模型
+        self.clf.fit(X_train, Y_train)
+    def predict_samples(self, samples):
+        # 对样本进行相同的预处理步骤
+        samples = self.scaler.transform(samples)
+        # 使用模型进行预测
+        predictions = self.clf.predict(samples)
+        # 将预测的标签解码回原始值
+        predictions = self.label_encoder.inverse_transform(predictions)
+        return predictions
 def add_max_score_to_list(temp_scores, current_score, selected_indices, selected_indices_list):
     max_score_index = np.argmax(np.array(temp_scores))
     selected_indices_list.append(max_score_index)
+def fs(data, method, num_fea_int, clf, testsample):
     num_fea_int = int(num_fea_int)
     if method == 'MRMR_FCD':
         data = pd.read_csv(data.name)
         combined = list(zip(selected_indices_list, current_score))
         # 使用sorted()函数对合并后的列表进行排序，key参数指定按照分数排序，reverse=True表示降序排序
         sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
+        # 将索引和特征分数写入csv文件
+        with open('index-score.csv', 'w', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow(["Index", "Score"])  # 写入列名
+            writer.writerows(sorted_combined)
         inde = []
         scores = []
         for indy in sorted_combined:
             inde.append(str(indy[0] + 1))
             scores.append(indy[1])
+        # 创建第一个图索引-分数图
+        plt.figure(1, figsize=(24, 10))
+        plt.title("mRMR-FCD()")
+        plt.plot(inde, scores)
+        plt.xlabel("Feature Index")
+        plt.ylabel("Feature Score")
+        plt.savefig('Index_Score.png')
+        ff = []  # 将字符串索引转化成整型
         for fire in inde:
             ff.append(int(fire) - 1)
+        # 选择分类器
         if clf == 'RF':
             clf = RandomForestClassifier(n_jobs=-1)
         elif clf == 'KNN':
         elif clf == 'DT':
             clf = DecisionTreeClassifier()
         elif clf == 'SVM':
+            clf = SVC(C=1.0, kernel='rbf')
         elif clf == 'Naive Bayes':
             clf = GaussianNB()
         max_acc = max(acc)
         max_index = acc.index(max_acc) + 1
+        # 创建第二个图IFS准确率率图
+        plt.figure(2, figsize=(24, 10))
+        plt.title("IFS_" + str(method) + "_Accuracy")
+        plt.plot(max_index, max_acc, 'ro')
+        plt.plot(acc)
+        plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
                      ha='center')
         # 设置x轴��y轴的标签
+        plt.xlabel("Top n features")
+        plt.ylabel('Accuracy')
+        plt.savefig('acc.png')
+        # 从test.csv加载测试样本和标签
+        testsample = pd.read_csv(testsample.name)
+        test_samples = testsample.iloc[:, :-1].values
+        test_labels = testsample.iloc[:, -1].values
+        # 加载模型
+        models = SVC(C=1.0, kernel='rbf')
+        my_model = MyModel(models)
+        my_model.train(X, y)
+        # 预测测试样本的标签
+        predictions = my_model.predict_samples(test_samples)
+        # 计算混淆矩阵
+        cm = confusion_matrix(test_labels, predictions)
+        # 使用seaborn绘制混淆矩阵热力图
+        plt.figure(figsize=(24, 10))
+        sns.heatmap(cm, annot=True, fmt='d')
+        plt.xlabel('predict labels')
+        plt.ylabel('True labels')
         plt.grid(True)
+        plt.savefig('confusion_matrix.png')
+        return 'Index_Score.png', 'acc.png', "confusion_matrix.png", "index-score.csv"
     elif method == 'MRMR_FCQ':
         data = pd.read_csv(data.name)
         # 使用sorted()函数对合并后的列表进行排序，key参数指定按照分数排序，reverse=True表示降序排序
         sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
+        # 将索引和特征分数写入csv文件
+        with open('index-score.csv', 'w', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow(["Index", "Score"])  # 写入列名
+            writer.writerows(sorted_combined)
         inde = []
         scores = []
         for indy in sorted_combined:
             inde.append(str(indy[0] + 1))
             scores.append(indy[1])
+        # 创建第一个图索引-分数图
+        plt.figure(1, figsize=(24, 10))
+        plt.title("mRMR-FCD()")
+        plt.plot(inde, scores)
+        plt.xlabel("Feature Index")
+        plt.ylabel("Feature Score")
+        plt.savefig('Index_Score.png')
+        ff = []  # 将字符串索引转化成整型
         for fire in inde:
             ff.append(int(fire) - 1)
+        # 选择分类器
         if clf == 'RF':
             clf = RandomForestClassifier(n_jobs=-1)
         elif clf == 'KNN':
         elif clf == 'DT':
             clf = DecisionTreeClassifier()
         elif clf == 'SVM':
+            clf = SVC(C=1.0, kernel='rbf')
         elif clf == 'Naive Bayes':
             clf = GaussianNB()
         max_acc = max(acc)
         max_index = acc.index(max_acc) + 1
+        # 创建第二个图IFS准确率率图
+        plt.figure(2, figsize=(24, 10))
+        plt.title("IFS_" + str(method) + "_Accuracy")
+        plt.plot(max_index, max_acc, 'ro')
+        plt.plot(acc)
+        plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, 20),
                      ha='center')
         # 设置x轴和y轴的标签
+        plt.xlabel("Top n features")
+        plt.ylabel('Accuracy')
+        plt.savefig('acc.png')
+        # 从test.csv加载测试样本和标签
+        testsample = pd.read_csv(testsample.name)
+        test_samples = testsample.iloc[:, :-1].values
+        test_labels = testsample.iloc[:, -1].values
+        # 加载模型
+        models = SVC(C=1.0, kernel='rbf')
+        my_model = MyModel(models)
+        my_model.train(X, y)
+        # 预测测试样本的标签
+        predictions = my_model.predict_samples(test_samples)
+        # 计算混淆矩阵
+        cm = confusion_matrix(test_labels, predictions)
+        # 使用seaborn绘制混淆矩阵热力图
+        plt.figure(figsize=(24, 10))
+        sns.heatmap(cm, annot=True, fmt='d')
+        plt.xlabel('predict labels')
+        plt.ylabel('True labels')
         plt.grid(True)
+        plt.savefig('confusion_matrix.png')
+        return 'Index_Score.png', 'acc.png', "confusion_matrix.png", "index-score.csv"
     # 添加你们的代码在这里,我们先全部写成函数，然后再封装成类，主要是先把样子做出来
     # 然后目前最终结果是返回一个图片，包含了含有特征的索引及其对应的分数的图，还有一张是增量式特征选择的准确率图
     # 我上面的代码很多地方还可以优化，比如画图，选择分类器这些，但是你们都先不用管，把下面的几个elif写完先，然后我们再讨论优化代码的事情。
         importance = np.abs(cl.coef_)
         feature_names = list(X)
         a = len(feature_names)
         idx_features = (-importance).argsort()[:a]
             print((name_features)[i], importance[idx_features][i])
         result = pd.DataFrame({'index': idx_features, 'Score': importance[idx_features]})
         result_rank = result.sort_values(by='Score', ascending=False, ignore_index=True)
+        result_rank.to_csv("index-score.csv")
         inde = result_rank['index'].tolist()
         score = result_rank['Score'].tolist()
         index = []
         for i in inde:
             index.append(str(i))
+        plt.figure(1, figsize=(24, 12))
+        plt.title(str(method))
+        plt.plot(index[:num_fea_int], score[:num_fea_int])
         # 设置x轴和y轴的标签
+        plt.xlabel('Feature Index')
+        plt.ylabel('Feature Score')
+        plt.savefig('Index_Score.png')
         if clf == 'RF':
             clf = RandomForestClassifier(n_jobs=-1)
         elif clf == 'KNN':
             clf = SVC()
         elif clf == 'Naive Bayes':
             clf = GaussianNB()
         inde = inde[:num_fea_int]
         index = index[:num_fea_int]
         acc = []
         # 对于index列表中的每个特征索引
         X = data.iloc[:, :-1].values
         for i in range(len(index)):
             # 使用前i个特征进行交叉验证
             selected_features = X[:, [int(j) - 1 for j in inde[:i + 1]]]
         max_acc = max(acc)
         max_index = acc.index(max_acc) + 1
+        # ax2 = fig.add_subplot(212)
+        # ax2.set_title("IFS_" + str(method) + "_Accuracy")
+        plt.figure(2, figsize=(24, 10))
+        plt.plot(max_index, max_acc, 'ro')
+        plt.plot(acc)
+        plt.annotate(f'({max_index}, {max_acc})', (max_index, max_acc), textcoords="offset points", xytext=(-5, -5),
                      ha='center')
         # 设置x轴和y轴的标签
+        # ax2.set_xlabel()
+        # ax2.set_ylabel('Accuracy')
+        plt.xlabel('Top n features')
+        plt.ylabel('Accuracy')
+        plt.grid(True)
+        plt.savefig('acc.png')
+        testsample = pd.read_csv(testsample.name)
+        test_samples = testsample.iloc[:, :-1].values
+        test_labels = testsample.iloc[:, -1].values
+        models = SVC(C=1.0, kernel='rbf')
+        my_model = MyModel(models)
+        my_model.train(X, y)
+        # 预测测试样本的标签并计算准确率
+        predictions = my_model.predict_samples(test_samples)
+        # 计算混淆矩阵
+        cm = confusion_matrix(test_labels, predictions)
+        # 使用seaborn绘制混淆矩阵热力图
+        plt.figure(figsize=(24, 10))
+        sns.heatmap(cm, annot=True, fmt='d')
+        plt.xlabel('predict labels')
+        plt.ylabel('True labels')
         plt.grid(True)
+        plt.savefig('confusion_matrix.png')
+        return 'Index_Score.png', 'acc.png', "confusion_matrix.png",'index-score.csv'
     elif method == 'Ensemble':
         pass
             gr.inputs.Radio(['MRMR_FCD', 'MRMR_FCQ', 'CFS', 'Lasso', 'Ensemble', 'CI']),
             gr.inputs.Number(),
             gr.inputs.Radio(['RF', 'SVM', 'KNN', 'DT', 'Naive Bayes']),
+            "file"
             ],
+    outputs=["image", "image", "image", "file"],
     article=article,
     examples=[
+        ["example_data.csv", 'MRMR_FCQ', 20, 'RF', "test.csv"],
+        ["example_data.csv", 'MRMR_FCD', 10, 'SVM', "test.csv"],
+        ["example_data.csv", 'MRMR_FCD', 30, 'KNN', "test.csv"],
+        ["example_data.csv", 'CFS', 50, 'DT', "test.csv"],
+        ["example_data.csv", 'CFS', 40, 'Naive Bayes', "test.csv"],
     ],
     allow_flagging="never"
 )