|
import pandas as pd |
|
import numpy as np |
|
import torch |
|
import torch.nn as nn |
|
from transformers import BertTokenizer, BertModel |
|
from sklearn.preprocessing import StandardScaler, LabelEncoder |
|
from sklearn.ensemble import IsolationForest |
|
import warnings |
|
warnings.filterwarnings('ignore') |
|
|
|
class FraudDetectionTester: |
|
def __init__(self, model_path='fraud_detection_model.pth'): |
|
"""Initialize the fraud detection tester""" |
|
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') |
|
self.model_path = model_path |
|
self.model = None |
|
self.scaler = None |
|
self.label_encoder = None |
|
self.isolation_forest = None |
|
|
|
|
|
self.load_model() |
|
|
|
def create_bert_fraud_model(self, numerical_features_dim): |
|
"""Recreate the BERT fraud detection model architecture""" |
|
|
|
class BERTFraudDetector(nn.Module): |
|
def __init__(self, bert_model_name, numerical_features_dim, dropout_rate=0.3): |
|
super(BERTFraudDetector, self).__init__() |
|
|
|
|
|
self.bert = BertModel.from_pretrained(bert_model_name) |
|
|
|
|
|
for param in self.bert.parameters(): |
|
param.requires_grad = False |
|
|
|
|
|
for param in self.bert.encoder.layer[-2:].parameters(): |
|
param.requires_grad = True |
|
|
|
|
|
self.text_projection = nn.Linear(self.bert.config.hidden_size, 256) |
|
self.numerical_projection = nn.Linear(numerical_features_dim, 256) |
|
|
|
|
|
self.anomaly_detector = nn.Sequential( |
|
nn.Linear(256, 128), |
|
nn.ReLU(), |
|
nn.Dropout(dropout_rate), |
|
nn.Linear(128, 64), |
|
nn.ReLU(), |
|
nn.Linear(64, 1) |
|
) |
|
|
|
|
|
self.classifier = nn.Sequential( |
|
nn.Linear(512 + 1, 256), |
|
nn.ReLU(), |
|
nn.Dropout(dropout_rate), |
|
nn.Linear(256, 128), |
|
nn.ReLU(), |
|
nn.Dropout(dropout_rate), |
|
nn.Linear(128, 64), |
|
nn.ReLU(), |
|
nn.Linear(64, 1), |
|
nn.Sigmoid() |
|
) |
|
|
|
def forward(self, input_ids, attention_mask, numerical_features): |
|
|
|
bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask) |
|
text_features = self.text_projection(bert_output.pooler_output) |
|
|
|
|
|
numerical_features = self.numerical_projection(numerical_features) |
|
|
|
|
|
anomaly_score = self.anomaly_detector(numerical_features) |
|
|
|
|
|
combined_features = torch.cat([text_features, numerical_features, anomaly_score], dim=1) |
|
|
|
|
|
fraud_probability = self.classifier(combined_features) |
|
|
|
return fraud_probability.squeeze(), anomaly_score.squeeze() |
|
|
|
return BERTFraudDetector('bert-base-uncased', numerical_features_dim) |
|
|
|
def load_model(self): |
|
"""Load the pre-trained fraud detection model""" |
|
try: |
|
print(f"π Loading model from {self.model_path}...") |
|
|
|
|
|
torch.serialization.add_safe_globals([ |
|
StandardScaler, |
|
LabelEncoder, |
|
IsolationForest |
|
]) |
|
|
|
|
|
|
|
checkpoint = torch.load(self.model_path, map_location=self.device, weights_only=False) |
|
|
|
|
|
self.scaler = checkpoint['scaler'] |
|
self.label_encoder = checkpoint['label_encoder'] |
|
self.isolation_forest = checkpoint['isolation_forest'] |
|
|
|
|
|
numerical_features_dim = 14 |
|
self.model = self.create_bert_fraud_model(numerical_features_dim) |
|
self.model.load_state_dict(checkpoint['model_state_dict']) |
|
self.model.to(self.device) |
|
self.model.eval() |
|
|
|
print("β
Model loaded successfully!") |
|
|
|
except FileNotFoundError: |
|
print(f"β Error: Model file '{self.model_path}' not found!") |
|
print("Make sure you have trained and saved the model first.") |
|
raise |
|
except Exception as e: |
|
print(f"β Error loading model: {str(e)}") |
|
print("If you're still getting errors, try updating PyTorch or ensure the model file is from a trusted source.") |
|
raise |
|
|
|
def tokenize_descriptions(self, descriptions, max_length=128): |
|
"""Tokenize transaction descriptions for BERT""" |
|
|
|
if hasattr(descriptions, 'tolist'): |
|
descriptions = descriptions.tolist() |
|
elif isinstance(descriptions, str): |
|
descriptions = [descriptions] |
|
elif not isinstance(descriptions, list): |
|
descriptions = list(descriptions) |
|
|
|
|
|
descriptions = [str(desc) for desc in descriptions] |
|
|
|
encoded = self.tokenizer( |
|
descriptions, |
|
truncation=True, |
|
padding=True, |
|
max_length=max_length, |
|
return_tensors='pt' |
|
) |
|
|
|
return encoded['input_ids'], encoded['attention_mask'] |
|
|
|
def preprocess_single_transaction(self, transaction): |
|
"""Preprocess a single transaction for prediction""" |
|
|
|
if isinstance(transaction, dict): |
|
df = pd.DataFrame([transaction]) |
|
else: |
|
df = pd.DataFrame(transaction) |
|
|
|
|
|
df['amount_log'] = np.log1p(df['amount']) |
|
df['is_weekend'] = (df['day_of_week'] >= 5).astype(int) |
|
df['is_night'] = ((df['hour'] >= 22) | (df['hour'] <= 6)).astype(int) |
|
df['high_frequency'] = (df['transaction_count_1h'] > 3).astype(int) |
|
df['amount_deviation'] = abs(df['amount'] - df['avg_amount_1h']) / (df['avg_amount_1h'] + 1) |
|
|
|
|
|
try: |
|
df['merchant_category_encoded'] = self.label_encoder.transform(df['merchant_category']) |
|
except ValueError as e: |
|
print(f"β οΈ Warning: Unknown merchant category '{df['merchant_category'].iloc[0]}'. Using default value.") |
|
|
|
df['merchant_category_encoded'] = 0 |
|
|
|
|
|
numerical_features = ['amount_log', 'hour', 'day_of_week', 'days_since_last_transaction', |
|
'transaction_count_1h', 'transaction_count_24h', 'avg_amount_1h', |
|
'location_risk_score', 'account_age_days', 'merchant_category_encoded', |
|
'is_weekend', 'is_night', 'high_frequency', 'amount_deviation'] |
|
|
|
X_numerical = self.scaler.transform(df[numerical_features]) |
|
|
|
|
|
df['processed_description'] = df['description'].astype(str).str.lower().str.replace(r'[^\w\s]', '', regex=True) |
|
|
|
return df, X_numerical |
|
|
|
def predict_fraud(self, transactions): |
|
"""Predict fraud for one or more transactions""" |
|
print("π Analyzing transactions for fraud...") |
|
|
|
|
|
if isinstance(transactions, dict): |
|
transactions = [transactions] |
|
|
|
results = [] |
|
|
|
for i, transaction in enumerate(transactions): |
|
try: |
|
|
|
df, X_numerical = self.preprocess_single_transaction(transaction) |
|
|
|
|
|
processed_descriptions = df['processed_description'].tolist() |
|
input_ids, attention_masks = self.tokenize_descriptions(processed_descriptions) |
|
|
|
|
|
with torch.no_grad(): |
|
batch_num = torch.tensor(X_numerical).float().to(self.device) |
|
batch_ids = input_ids.to(self.device) |
|
batch_masks = attention_masks.to(self.device) |
|
|
|
fraud_prob, anomaly_score = self.model(batch_ids, batch_masks, batch_num) |
|
|
|
|
|
isolation_pred = self.isolation_forest.decision_function(X_numerical) |
|
|
|
|
|
if isinstance(fraud_prob, torch.Tensor): |
|
if fraud_prob.dim() == 0: |
|
fraud_prob_val = fraud_prob.item() |
|
anomaly_score_val = anomaly_score.item() |
|
else: |
|
fraud_prob_val = fraud_prob[0].item() |
|
anomaly_score_val = anomaly_score[0].item() |
|
else: |
|
fraud_prob_val = float(fraud_prob) |
|
anomaly_score_val = float(anomaly_score) |
|
|
|
|
|
combined_score = (0.6 * fraud_prob_val + |
|
0.3 * (1 - (isolation_pred[0] + 0.5)) + |
|
0.1 * anomaly_score_val) |
|
|
|
|
|
result = { |
|
'transaction_id': transaction.get('transaction_id', f'test_{i+1}'), |
|
'amount': transaction['amount'], |
|
'description': transaction['description'], |
|
'fraud_probability': float(combined_score), |
|
'is_fraud_predicted': bool(combined_score > 0.5), |
|
'risk_level': self.get_risk_level(combined_score), |
|
'anomaly_score': float(anomaly_score_val), |
|
'bert_score': float(fraud_prob_val), |
|
'isolation_score': float(isolation_pred[0]) |
|
} |
|
|
|
results.append(result) |
|
|
|
except Exception as e: |
|
print(f"β Error processing transaction {i+1}: {str(e)}") |
|
import traceback |
|
traceback.print_exc() |
|
results.append({ |
|
'transaction_id': transaction.get('transaction_id', f'test_{i+1}'), |
|
'error': str(e) |
|
}) |
|
|
|
return results |
|
|
|
def get_risk_level(self, score): |
|
"""Determine risk level based on fraud probability""" |
|
if score > 0.8: |
|
return 'CRITICAL' |
|
elif score > 0.6: |
|
return 'HIGH' |
|
elif score > 0.4: |
|
return 'MEDIUM' |
|
elif score > 0.2: |
|
return 'LOW' |
|
else: |
|
return 'MINIMAL' |
|
|
|
def display_results(self, results): |
|
"""Display prediction results in a nice format""" |
|
print("\n" + "="*80) |
|
print("π¨ FRAUD DETECTION RESULTS") |
|
print("="*80) |
|
|
|
for i, result in enumerate(results): |
|
if 'error' in result: |
|
print(f"\nβ Transaction {i+1}: ERROR - {result['error']}") |
|
continue |
|
|
|
print(f"\nπ Transaction {i+1}:") |
|
print(f" ID: {result['transaction_id']}") |
|
print(f" Amount: ${result['amount']:.2f}") |
|
print(f" Description: {result['description']}") |
|
print(f" π― Fraud Probability: {result['fraud_probability']:.4f} ({result['fraud_probability']*100:.2f}%)") |
|
|
|
|
|
if result['is_fraud_predicted']: |
|
print(f" π¨ Prediction: FRAUD DETECTED") |
|
else: |
|
print(f" β
Prediction: LEGITIMATE") |
|
|
|
print(f" π Risk Level: {result['risk_level']}") |
|
print(f" π Anomaly Score: {result['anomaly_score']:.4f}") |
|
print(f" π€ BERT Score: {result['bert_score']:.4f}") |
|
print(f" ποΈ Isolation Score: {result['isolation_score']:.4f}") |
|
|
|
|
|
risk_bar = "β" * int(result['fraud_probability'] * 20) |
|
print(f" π Risk Meter: [{risk_bar:<20}] {result['fraud_probability']*100:.1f}%") |
|
|
|
print("\n" + "="*80) |
|
|
|
def create_sample_transactions(): |
|
"""Create sample transactions for testing""" |
|
return [ |
|
{ |
|
'transaction_id': 'TEST_001', |
|
'amount': 45.67, |
|
'merchant_category': 'grocery', |
|
'description': 'WALMART SUPERCENTER CA 1234', |
|
'hour': 14, |
|
'day_of_week': 2, |
|
'days_since_last_transaction': 1.0, |
|
'transaction_count_1h': 1, |
|
'transaction_count_24h': 3, |
|
'avg_amount_1h': 50.0, |
|
'location_risk_score': 0.1, |
|
'account_age_days': 730 |
|
}, |
|
{ |
|
'transaction_id': 'TEST_002', |
|
'amount': 2999.99, |
|
'merchant_category': 'online', |
|
'description': 'SUSPICIOUS ELECTRONICS STORE XX 9999', |
|
'hour': 3, |
|
'day_of_week': 6, |
|
'days_since_last_transaction': 60.0, |
|
'transaction_count_1h': 12, |
|
'transaction_count_24h': 25, |
|
'avg_amount_1h': 150.0, |
|
'location_risk_score': 0.95, |
|
'account_age_days': 15 |
|
}, |
|
{ |
|
'transaction_id': 'TEST_003', |
|
'amount': 89.50, |
|
'merchant_category': 'restaurant', |
|
'description': 'STARBUCKS COFFEE NY 5678', |
|
'hour': 8, |
|
'day_of_week': 1, |
|
'days_since_last_transaction': 0.5, |
|
'transaction_count_1h': 1, |
|
'transaction_count_24h': 4, |
|
'avg_amount_1h': 85.0, |
|
'location_risk_score': 0.2, |
|
'account_age_days': 1095 |
|
}, |
|
{ |
|
'transaction_id': 'TEST_004', |
|
'amount': 500.00, |
|
'merchant_category': 'atm', |
|
'description': 'ATM WITHDRAWAL FOREIGN COUNTRY 0000', |
|
'hour': 23, |
|
'day_of_week': 0, |
|
'days_since_last_transaction': 0.1, |
|
'transaction_count_1h': 5, |
|
'transaction_count_24h': 8, |
|
'avg_amount_1h': 200.0, |
|
'location_risk_score': 0.8, |
|
'account_age_days': 365 |
|
} |
|
] |
|
|
|
def create_custom_transaction(): |
|
"""Interactive function to create custom transaction""" |
|
print("\nπ οΈ CREATE CUSTOM TRANSACTION") |
|
print("-" * 40) |
|
|
|
transaction = {} |
|
|
|
try: |
|
transaction['transaction_id'] = input("Transaction ID (optional): ") or 'CUSTOM_001' |
|
transaction['amount'] = float(input("Amount ($): ")) |
|
|
|
print("Merchant categories: grocery, gas_station, restaurant, online, retail, atm") |
|
transaction['merchant_category'] = input("Merchant category: ") or 'online' |
|
|
|
transaction['description'] = input("Transaction description: ") or 'Unknown merchant' |
|
transaction['hour'] = int(input("Hour (0-23): ")) |
|
transaction['day_of_week'] = int(input("Day of week (0=Monday, 6=Sunday): ")) |
|
transaction['days_since_last_transaction'] = float(input("Days since last transaction: ")) |
|
transaction['transaction_count_1h'] = int(input("Transactions in last hour: ")) |
|
transaction['transaction_count_24h'] = int(input("Transactions in last 24 hours: ")) |
|
transaction['avg_amount_1h'] = float(input("Average amount in last hour ($): ")) |
|
transaction['location_risk_score'] = float(input("Location risk score (0-1): ")) |
|
transaction['account_age_days'] = float(input("Account age in days: ")) |
|
|
|
return transaction |
|
|
|
except ValueError as e: |
|
print(f"β Invalid input: {e}") |
|
return None |
|
|
|
def main(): |
|
"""Main testing function""" |
|
print("π FRAUD DETECTION MODEL TESTER") |
|
print("="*50) |
|
|
|
|
|
try: |
|
tester = FraudDetectionTester('fraud_detection_model.pth') |
|
except: |
|
print("Make sure you have the trained model file 'fraud_detection_model.pth' in the same directory!") |
|
return |
|
|
|
while True: |
|
print("\nπ TESTING OPTIONS:") |
|
print("1. Test with sample transactions") |
|
print("2. Create custom transaction") |
|
print("3. Test single transaction") |
|
print("4. Exit") |
|
|
|
choice = input("\nEnter your choice (1-4): ").strip() |
|
|
|
if choice == '1': |
|
|
|
sample_transactions = create_sample_transactions() |
|
results = tester.predict_fraud(sample_transactions) |
|
tester.display_results(results) |
|
|
|
elif choice == '2': |
|
|
|
custom_transaction = create_custom_transaction() |
|
if custom_transaction: |
|
results = tester.predict_fraud([custom_transaction]) |
|
tester.display_results(results) |
|
|
|
elif choice == '3': |
|
|
|
print("\nβ‘ QUICK TRANSACTION TEST") |
|
print("-" * 30) |
|
|
|
try: |
|
quick_transaction = { |
|
'transaction_id': 'QUICK_TEST', |
|
'amount': float(input("Amount ($): ")), |
|
'merchant_category': 'online', |
|
'description': input("Description: ") or 'Unknown transaction', |
|
'hour': int(input("Hour (0-23): ")), |
|
'day_of_week': 2, |
|
'days_since_last_transaction': 1.0, |
|
'transaction_count_1h': int(input("Transactions in last hour: ")), |
|
'transaction_count_24h': 5, |
|
'avg_amount_1h': 100.0, |
|
'location_risk_score': float(input("Risk score (0-1): ")), |
|
'account_age_days': 365 |
|
} |
|
|
|
results = tester.predict_fraud([quick_transaction]) |
|
tester.display_results(results) |
|
|
|
except ValueError as e: |
|
print(f"β Invalid input: {e}") |
|
|
|
elif choice == '4': |
|
print("π Goodbye!") |
|
break |
|
|
|
else: |
|
print("β Invalid choice! Please enter 1-4.") |
|
|
|
if __name__ == "__main__": |
|
main() |