Spaces:
Sleeping
Sleeping
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import pandas as pd | |
import numpy as np | |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
class LoanPredictionDeepANN(nn.Module): | |
""" | |
Deeper version for maximum performance | |
Architecture: | |
- Input: 9 features | |
- Hidden Layer 1: 128 neurons (ReLU) | |
- Hidden Layer 2: 64 neurons (ReLU) | |
- Hidden Layer 3: 32 neurons (ReLU) | |
- Hidden Layer 4: 16 neurons (ReLU) | |
- Output: 1 neuron (Sigmoid) | |
- Dropout: [0.3, 0.3, 0.2, 0.1] | |
""" | |
def __init__(self, input_size=9): | |
super(LoanPredictionDeepANN, self).__init__() | |
self.fc1 = nn.Linear(input_size, 128) | |
self.dropout1 = nn.Dropout(0.3) | |
self.fc2 = nn.Linear(128, 64) | |
self.dropout2 = nn.Dropout(0.3) | |
self.fc3 = nn.Linear(64, 32) | |
self.dropout3 = nn.Dropout(0.2) | |
self.fc4 = nn.Linear(32, 16) | |
self.dropout4 = nn.Dropout(0.1) | |
self.fc5 = nn.Linear(16, 1) | |
self._initialize_weights() | |
def _initialize_weights(self): | |
for module in self.modules(): | |
if isinstance(module, nn.Linear): | |
nn.init.xavier_uniform_(module.weight) | |
nn.init.zeros_(module.bias) | |
def forward(self, x): | |
x = F.relu(self.fc1(x)) | |
x = self.dropout1(x) | |
x = F.relu(self.fc2(x)) | |
x = self.dropout2(x) | |
x = F.relu(self.fc3(x)) | |
x = self.dropout3(x) | |
x = F.relu(self.fc4(x)) | |
x = self.dropout4(x) | |
x = torch.sigmoid(self.fc5(x)) | |
return x | |
def load_processed_data(data_path='data/processed'): | |
"""Load the processed training and test data""" | |
train_data = pd.read_csv(f'{data_path}/train_data_scaled.csv') | |
test_data = pd.read_csv(f'{data_path}/test_data_scaled.csv') | |
# Separate features and target | |
feature_columns = [col for col in train_data.columns if col != 'loan_repaid'] | |
X_train = train_data[feature_columns].values | |
y_train = train_data['loan_repaid'].values | |
X_test = test_data[feature_columns].values | |
y_test = test_data['loan_repaid'].values | |
return X_train, y_train, X_test, y_test, feature_columns | |
def calculate_class_weights(y): | |
"""Calculate class weights for handling imbalanced data""" | |
from sklearn.utils.class_weight import compute_class_weight | |
classes = np.unique(y) | |
weights = compute_class_weight('balanced', classes=classes, y=y) | |
return torch.FloatTensor(weights) | |
def evaluate_model(model, X_test, y_test, threshold=0.5): | |
"""Comprehensive model evaluation - updated for logits output""" | |
model.eval() | |
# Get predictions | |
with torch.no_grad(): | |
X_test_tensor = torch.FloatTensor(X_test) | |
y_logits = model(X_test_tensor) | |
y_pred_proba = torch.sigmoid(y_logits).numpy().flatten() | |
y_pred = (y_pred_proba >= threshold).astype(int) | |
# Calculate metrics | |
accuracy = accuracy_score(y_test, y_pred) | |
precision = precision_score(y_test, y_pred) | |
recall = recall_score(y_test, y_pred) | |
f1 = f1_score(y_test, y_pred) | |
auc_roc = roc_auc_score(y_test, y_pred_proba) | |
metrics = { | |
'accuracy': accuracy, | |
'precision': precision, | |
'recall': recall, | |
'f1_score': f1, | |
'auc_roc': auc_roc | |
} | |
return metrics, y_pred, y_pred_proba | |
def plot_training_history(train_losses, val_losses, train_accuracies, val_accuracies): | |
"""Plot training history""" | |
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5)) | |
# Loss plot | |
ax1.plot(train_losses, label='Training Loss', color='blue') | |
ax1.plot(val_losses, label='Validation Loss', color='red') | |
ax1.set_title('Model Loss') | |
ax1.set_xlabel('Epoch') | |
ax1.set_ylabel('Loss') | |
ax1.legend() | |
ax1.grid(True) | |
# Accuracy plot | |
ax2.plot(train_accuracies, label='Training Accuracy', color='blue') | |
ax2.plot(val_accuracies, label='Validation Accuracy', color='red') | |
ax2.set_title('Model Accuracy') | |
ax2.set_xlabel('Epoch') | |
ax2.set_ylabel('Accuracy') | |
ax2.legend() | |
ax2.grid(True) | |
plt.tight_layout() | |
plt.show() | |
def plot_confusion_matrix(y_true, y_pred, class_names=['Charged Off', 'Fully Paid']): | |
"""Plot confusion matrix""" | |
from sklearn.metrics import confusion_matrix | |
cm = confusion_matrix(y_true, y_pred) | |
plt.figure(figsize=(8, 6)) | |
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', | |
xticklabels=class_names, yticklabels=class_names) | |
plt.title('Confusion Matrix') | |
plt.xlabel('Predicted') | |
plt.ylabel('Actual') | |
plt.show() | |
return cm | |
def model_summary(model): | |
"""Print model architecture summary""" | |
print("=" * 60) | |
print("MODEL ARCHITECTURE SUMMARY") | |
print("=" * 60) | |
total_params = sum(p.numel() for p in model.parameters()) | |
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
print(f"Model: {model.__class__.__name__}") | |
print(f"Total parameters: {total_params:,}") | |
print(f"Trainable parameters: {trainable_params:,}") | |
print("\nLayer Details:") | |
print("-" * 40) | |
for name, module in model.named_modules(): | |
if isinstance(module, nn.Linear): | |
print(f"{name}: {module}") | |
elif isinstance(module, nn.Dropout): | |
print(f"{name}: {module}") | |
print("=" * 60) | |
if __name__ == "__main__": | |
# Example usage | |
print("Loading processed data...") | |
X_train, y_train, X_test, y_test, feature_names = load_processed_data() | |
print(f"Training data shape: {X_train.shape}") | |
print(f"Test data shape: {X_test.shape}") | |
print(f"Feature names: {feature_names}") | |
# Create model | |
model = LoanPredictionDeepANN() | |
model_summary(model) | |
print("\nModel created successfully!") | |
print("Use train.py to train the model.") |