nullHawk commited on
Commit
33206e3
·
1 Parent(s): 85d5c46

add: model

Browse files
Files changed (1) hide show
  1. model.py +322 -0
model.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+
10
+ class LoanPredictionANN(nn.Module):
11
+ """
12
+ Neural Network for Loan Prediction
13
+
14
+ Architecture:
15
+ - Input: 9 features
16
+ - Hidden Layer 1: 64 neurons (ReLU)
17
+ - Hidden Layer 2: 32 neurons (ReLU)
18
+ - Hidden Layer 3: 16 neurons (ReLU)
19
+ - Output: 1 neuron (Sigmoid)
20
+ - Dropout: Progressive rates [0.3, 0.2, 0.1]
21
+ """
22
+
23
+ def __init__(self, input_size=9, hidden_sizes=[64, 32, 16], dropout_rates=[0.3, 0.2, 0.1]):
24
+ super(LoanPredictionANN, self).__init__()
25
+
26
+ self.input_size = input_size
27
+ self.hidden_sizes = hidden_sizes
28
+ self.dropout_rates = dropout_rates
29
+
30
+ # Input layer to first hidden layer
31
+ self.fc1 = nn.Linear(input_size, hidden_sizes[0])
32
+ self.dropout1 = nn.Dropout(dropout_rates[0])
33
+
34
+ # Hidden layers
35
+ self.fc2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
36
+ self.dropout2 = nn.Dropout(dropout_rates[1])
37
+
38
+ self.fc3 = nn.Linear(hidden_sizes[1], hidden_sizes[2])
39
+ self.dropout3 = nn.Dropout(dropout_rates[2])
40
+
41
+ # Output layer
42
+ self.fc4 = nn.Linear(hidden_sizes[2], 1)
43
+
44
+ # Initialize weights
45
+ self._initialize_weights()
46
+
47
+ def _initialize_weights(self):
48
+ """Initialize weights using Xavier/Glorot initialization"""
49
+ for module in self.modules():
50
+ if isinstance(module, nn.Linear):
51
+ nn.init.xavier_uniform_(module.weight)
52
+ nn.init.zeros_(module.bias)
53
+
54
+ def forward(self, x):
55
+ """Forward pass through the network"""
56
+ # First hidden layer
57
+ x = F.relu(self.fc1(x))
58
+ x = self.dropout1(x)
59
+
60
+ # Second hidden layer
61
+ x = F.relu(self.fc2(x))
62
+ x = self.dropout2(x)
63
+
64
+ # Third hidden layer
65
+ x = F.relu(self.fc3(x))
66
+ x = self.dropout3(x)
67
+
68
+ # Output layer
69
+ x = torch.sigmoid(self.fc4(x))
70
+
71
+ return x
72
+
73
+ def predict_proba(self, x):
74
+ """Get prediction probabilities"""
75
+ self.eval()
76
+ with torch.no_grad():
77
+ if isinstance(x, np.ndarray):
78
+ x = torch.FloatTensor(x)
79
+ return self.forward(x).numpy()
80
+
81
+ def predict(self, x, threshold=0.5):
82
+ """Get binary predictions"""
83
+ probabilities = self.predict_proba(x)
84
+ return (probabilities >= threshold).astype(int)
85
+
86
+
87
+ class LoanPredictionLightANN(nn.Module):
88
+ """
89
+ Lighter version of the neural network for faster training
90
+
91
+ Architecture:
92
+ - Input: 9 features
93
+ - Hidden Layer 1: 32 neurons (ReLU)
94
+ - Hidden Layer 2: 16 neurons (ReLU)
95
+ - Output: 1 neuron (Sigmoid)
96
+ - Dropout: [0.2, 0.1]
97
+ """
98
+
99
+ def __init__(self, input_size=9):
100
+ super(LoanPredictionLightANN, self).__init__()
101
+
102
+ self.fc1 = nn.Linear(input_size, 32)
103
+ self.dropout1 = nn.Dropout(0.2)
104
+
105
+ self.fc2 = nn.Linear(32, 16)
106
+ self.dropout2 = nn.Dropout(0.1)
107
+
108
+ self.fc3 = nn.Linear(16, 1)
109
+
110
+ self._initialize_weights()
111
+
112
+ def _initialize_weights(self):
113
+ for module in self.modules():
114
+ if isinstance(module, nn.Linear):
115
+ nn.init.xavier_uniform_(module.weight)
116
+ nn.init.zeros_(module.bias)
117
+
118
+ def forward(self, x):
119
+ x = F.relu(self.fc1(x))
120
+ x = self.dropout1(x)
121
+
122
+ x = F.relu(self.fc2(x))
123
+ x = self.dropout2(x)
124
+
125
+ x = torch.sigmoid(self.fc3(x))
126
+
127
+ return x
128
+
129
+
130
+ class LoanPredictionDeepANN(nn.Module):
131
+ """
132
+ Deeper version for maximum performance
133
+
134
+ Architecture:
135
+ - Input: 9 features
136
+ - Hidden Layer 1: 128 neurons (ReLU)
137
+ - Hidden Layer 2: 64 neurons (ReLU)
138
+ - Hidden Layer 3: 32 neurons (ReLU)
139
+ - Hidden Layer 4: 16 neurons (ReLU)
140
+ - Output: 1 neuron (Sigmoid)
141
+ - Dropout: [0.3, 0.3, 0.2, 0.1]
142
+ """
143
+
144
+ def __init__(self, input_size=9):
145
+ super(LoanPredictionDeepANN, self).__init__()
146
+
147
+ self.fc1 = nn.Linear(input_size, 128)
148
+ self.dropout1 = nn.Dropout(0.3)
149
+
150
+ self.fc2 = nn.Linear(128, 64)
151
+ self.dropout2 = nn.Dropout(0.3)
152
+
153
+ self.fc3 = nn.Linear(64, 32)
154
+ self.dropout3 = nn.Dropout(0.2)
155
+
156
+ self.fc4 = nn.Linear(32, 16)
157
+ self.dropout4 = nn.Dropout(0.1)
158
+
159
+ self.fc5 = nn.Linear(16, 1)
160
+
161
+ self._initialize_weights()
162
+
163
+ def _initialize_weights(self):
164
+ for module in self.modules():
165
+ if isinstance(module, nn.Linear):
166
+ nn.init.xavier_uniform_(module.weight)
167
+ nn.init.zeros_(module.bias)
168
+
169
+ def forward(self, x):
170
+ x = F.relu(self.fc1(x))
171
+ x = self.dropout1(x)
172
+
173
+ x = F.relu(self.fc2(x))
174
+ x = self.dropout2(x)
175
+
176
+ x = F.relu(self.fc3(x))
177
+ x = self.dropout3(x)
178
+
179
+ x = F.relu(self.fc4(x))
180
+ x = self.dropout4(x)
181
+
182
+ x = torch.sigmoid(self.fc5(x))
183
+
184
+ return x
185
+
186
+
187
+ def load_processed_data(data_path='data/processed'):
188
+ """Load the processed training and test data"""
189
+ train_data = pd.read_csv(f'{data_path}/train_data_scaled.csv')
190
+ test_data = pd.read_csv(f'{data_path}/test_data_scaled.csv')
191
+
192
+ # Separate features and target
193
+ feature_columns = [col for col in train_data.columns if col != 'loan_repaid']
194
+
195
+ X_train = train_data[feature_columns].values
196
+ y_train = train_data['loan_repaid'].values
197
+
198
+ X_test = test_data[feature_columns].values
199
+ y_test = test_data['loan_repaid'].values
200
+
201
+ return X_train, y_train, X_test, y_test, feature_columns
202
+
203
+
204
+ def calculate_class_weights(y):
205
+ """Calculate class weights for handling imbalanced data"""
206
+ from sklearn.utils.class_weight import compute_class_weight
207
+
208
+ classes = np.unique(y)
209
+ weights = compute_class_weight('balanced', classes=classes, y=y)
210
+ return torch.FloatTensor(weights)
211
+
212
+
213
+ def evaluate_model(model, X_test, y_test, threshold=0.5):
214
+ """Comprehensive model evaluation"""
215
+ model.eval()
216
+
217
+ # Get predictions
218
+ with torch.no_grad():
219
+ X_test_tensor = torch.FloatTensor(X_test)
220
+ y_pred_proba = model(X_test_tensor).numpy().flatten()
221
+ y_pred = (y_pred_proba >= threshold).astype(int)
222
+
223
+ # Calculate metrics
224
+ accuracy = accuracy_score(y_test, y_pred)
225
+ precision = precision_score(y_test, y_pred)
226
+ recall = recall_score(y_test, y_pred)
227
+ f1 = f1_score(y_test, y_pred)
228
+ auc_roc = roc_auc_score(y_test, y_pred_proba)
229
+
230
+ metrics = {
231
+ 'accuracy': accuracy,
232
+ 'precision': precision,
233
+ 'recall': recall,
234
+ 'f1_score': f1,
235
+ 'auc_roc': auc_roc
236
+ }
237
+
238
+ return metrics, y_pred, y_pred_proba
239
+
240
+
241
+ def plot_training_history(train_losses, val_losses, train_accuracies, val_accuracies):
242
+ """Plot training history"""
243
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
244
+
245
+ # Loss plot
246
+ ax1.plot(train_losses, label='Training Loss', color='blue')
247
+ ax1.plot(val_losses, label='Validation Loss', color='red')
248
+ ax1.set_title('Model Loss')
249
+ ax1.set_xlabel('Epoch')
250
+ ax1.set_ylabel('Loss')
251
+ ax1.legend()
252
+ ax1.grid(True)
253
+
254
+ # Accuracy plot
255
+ ax2.plot(train_accuracies, label='Training Accuracy', color='blue')
256
+ ax2.plot(val_accuracies, label='Validation Accuracy', color='red')
257
+ ax2.set_title('Model Accuracy')
258
+ ax2.set_xlabel('Epoch')
259
+ ax2.set_ylabel('Accuracy')
260
+ ax2.legend()
261
+ ax2.grid(True)
262
+
263
+ plt.tight_layout()
264
+ plt.show()
265
+
266
+
267
+ def plot_confusion_matrix(y_true, y_pred, class_names=['Charged Off', 'Fully Paid']):
268
+ """Plot confusion matrix"""
269
+ from sklearn.metrics import confusion_matrix
270
+
271
+ cm = confusion_matrix(y_true, y_pred)
272
+
273
+ plt.figure(figsize=(8, 6))
274
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
275
+ xticklabels=class_names, yticklabels=class_names)
276
+ plt.title('Confusion Matrix')
277
+ plt.xlabel('Predicted')
278
+ plt.ylabel('Actual')
279
+ plt.show()
280
+
281
+ return cm
282
+
283
+
284
+ def model_summary(model):
285
+ """Print model architecture summary"""
286
+ print("=" * 60)
287
+ print("MODEL ARCHITECTURE SUMMARY")
288
+ print("=" * 60)
289
+
290
+ total_params = sum(p.numel() for p in model.parameters())
291
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
292
+
293
+ print(f"Model: {model.__class__.__name__}")
294
+ print(f"Total parameters: {total_params:,}")
295
+ print(f"Trainable parameters: {trainable_params:,}")
296
+ print("\nLayer Details:")
297
+ print("-" * 40)
298
+
299
+ for name, module in model.named_modules():
300
+ if isinstance(module, nn.Linear):
301
+ print(f"{name}: {module}")
302
+ elif isinstance(module, nn.Dropout):
303
+ print(f"{name}: {module}")
304
+
305
+ print("=" * 60)
306
+
307
+
308
+ if __name__ == "__main__":
309
+ # Example usage
310
+ print("Loading processed data...")
311
+ X_train, y_train, X_test, y_test, feature_names = load_processed_data()
312
+
313
+ print(f"Training data shape: {X_train.shape}")
314
+ print(f"Test data shape: {X_test.shape}")
315
+ print(f"Feature names: {feature_names}")
316
+
317
+ # Create model
318
+ model = LoanPredictionANN()
319
+ model_summary(model)
320
+
321
+ print("\nModel created successfully!")
322
+ print("Use train.py to train the model.")