import numpy as np import pandas as pd import os class Network(object): def __init__(self, sizes): self.num_layers = len(sizes) self.sizes = sizes self.biases = [np.random.randn(y, 1) for y in sizes[1:]] self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])] def feedforward(self, a): for b, w in zip(self.biases, self.weights): a = sigmoid(np.dot(w, a) + b) return a def train(self, training_data, epochs, eta): n = len(training_data) for j in range(epochs): np.random.shuffle(training_data) for x, y in training_data: self.update_network(x, y, eta) # print(f"Epoch {j} complete") # 注释掉每个 epoch 完成后的打印 def update_network(self, x, y, eta): nabla_b, nabla_w = self.backprop(x, y) self.weights = [w - eta * nw for w, nw in zip(self.weights, nabla_w)] self.biases = [b - eta * nb for b, nb in zip(self.biases, nabla_b)] def evaluate(self, test_data): test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data] return sum(int(x == y) for (x, y) in test_results) def backprop(self, x, y): nabla_b = [np.zeros(b.shape) for b in self.biases] nabla_w = [np.zeros(w.shape) for w in self.weights] activation = x activations = [x] zs = [] for b, w in zip(self.biases, self.weights): z = np.dot(w, activation) + b zs.append(z) activation = sigmoid(z) activations.append(activation) delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) nabla_b[-1] = delta nabla_w[-1] = np.dot(delta, activations[-2].transpose()) for l in range(2, self.num_layers): z = zs[-l] sp = sigmoid_prime(z) delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp nabla_b[-l] = delta nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) return (nabla_b, nabla_w) def cost_derivative(self, output_activations, y): return (output_activations - y) def sigmoid(z): return 1.0 / (1.0 + np.exp(-z)) def sigmoid_prime(z): return sigmoid(z) * (1 - sigmoid(z)) def k_fold_cross_validation(dataset, k): np.random.shuffle(dataset) fold_size = len(dataset) // k for i in range(k): validation_data = dataset[i * fold_size:(i + 1) * fold_size] training_data = dataset[:i * fold_size] + dataset[(i + 1) * fold_size:] yield training_data, validation_data def load_and_preprocess_data(file_path): data = pd.read_excel(file_path, header=None) features = data.iloc[:, :-1] features = (features - features.mean()) / features.std() labels = data.iloc[:, -1] if labels.dtype == object or np.issubdtype(labels.dtype, np.integer): unique_labels = labels.unique() label_mapping = {label: idx for idx, label in enumerate(unique_labels)} labels = labels.map(label_mapping) label_vectors = np.zeros((labels.size, len(unique_labels))) for i, label in enumerate(labels): label_vectors[i, label] = 1 dataset = list(zip([np.reshape(x, (len(x), 1)) for x in features.to_numpy()], [np.reshape(y, (len(y), 1)) for y in label_vectors])) return dataset folder_path = 'C:\\Users\\tt235\\Desktop\\Code\\code\\代码复现\\算法学习测试数据集' files = [f for f in os.listdir(folder_path) if f.endswith('.xls')] for file in files: file_path = os.path.join(folder_path, file) dataset = load_and_preprocess_data(file_path) for iteration in range(10): # 外层循环,进行十次交叉验证 accuracies = [] # 存储当前次交叉验证的所有准确率 for i, (train_data, validation_data) in enumerate(k_fold_cross_validation(dataset, 10)): input_size = len(train_data[0][0]) output_size = len(train_data[0][1]) net = Network([input_size, 30, output_size]) net.train(train_data, 30, 3.0) # 根据需要调整 epochs 和学习率 validation_accuracy = net.evaluate(validation_data) accuracies.append(validation_accuracy / len(validation_data)) average_accuracy = (sum(accuracies) / len(accuracies)) * 100 print(f"\nIteration {iteration + 1} Training on file: {file}") print(f"Average Validation Accuracy: {average_accuracy:.2f}%")