shukdevdatta123 commited on
Commit
0e828f7
Β·
verified Β·
1 Parent(s): 653a298

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +470 -0
app.py ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import torch
4
+ import torch.nn as nn
5
+ from transformers import BertTokenizer, BertModel
6
+ from sklearn.preprocessing import StandardScaler, LabelEncoder
7
+ from sklearn.ensemble import IsolationForest
8
+ import warnings
9
+ warnings.filterwarnings('ignore')
10
+
11
+ class FraudDetectionTester:
12
+ def __init__(self, model_path='fraud_detection_model.pth'):
13
+ """Initialize the fraud detection tester"""
14
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
+ self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
16
+ self.model_path = model_path
17
+ self.model = None
18
+ self.scaler = None
19
+ self.label_encoder = None
20
+ self.isolation_forest = None
21
+
22
+ # Load the model
23
+ self.load_model()
24
+
25
+ def create_bert_fraud_model(self, numerical_features_dim):
26
+ """Recreate the BERT fraud detection model architecture"""
27
+
28
+ class BERTFraudDetector(nn.Module):
29
+ def __init__(self, bert_model_name, numerical_features_dim, dropout_rate=0.3):
30
+ super(BERTFraudDetector, self).__init__()
31
+
32
+ # BERT for text processing
33
+ self.bert = BertModel.from_pretrained(bert_model_name)
34
+
35
+ # Freeze BERT parameters for faster training (optional)
36
+ for param in self.bert.parameters():
37
+ param.requires_grad = False
38
+
39
+ # Unfreeze last few layers for fine-tuning
40
+ for param in self.bert.encoder.layer[-2:].parameters():
41
+ param.requires_grad = True
42
+
43
+ # Feature processing layers
44
+ self.text_projection = nn.Linear(self.bert.config.hidden_size, 256)
45
+ self.numerical_projection = nn.Linear(numerical_features_dim, 256)
46
+
47
+ # Anomaly detection features
48
+ self.anomaly_detector = nn.Sequential(
49
+ nn.Linear(256, 128),
50
+ nn.ReLU(),
51
+ nn.Dropout(dropout_rate),
52
+ nn.Linear(128, 64),
53
+ nn.ReLU(),
54
+ nn.Linear(64, 1)
55
+ )
56
+
57
+ # Combined classifier
58
+ self.classifier = nn.Sequential(
59
+ nn.Linear(512 + 1, 256), # 256 + 256 + 1 (anomaly score)
60
+ nn.ReLU(),
61
+ nn.Dropout(dropout_rate),
62
+ nn.Linear(256, 128),
63
+ nn.ReLU(),
64
+ nn.Dropout(dropout_rate),
65
+ nn.Linear(128, 64),
66
+ nn.ReLU(),
67
+ nn.Linear(64, 1),
68
+ nn.Sigmoid()
69
+ )
70
+
71
+ def forward(self, input_ids, attention_mask, numerical_features):
72
+ # Process text with BERT
73
+ bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
74
+ text_features = self.text_projection(bert_output.pooler_output)
75
+
76
+ # Process numerical features
77
+ numerical_features = self.numerical_projection(numerical_features)
78
+
79
+ # Anomaly detection
80
+ anomaly_score = self.anomaly_detector(numerical_features)
81
+
82
+ # Combine all features
83
+ combined_features = torch.cat([text_features, numerical_features, anomaly_score], dim=1)
84
+
85
+ # Final classification
86
+ fraud_probability = self.classifier(combined_features)
87
+
88
+ return fraud_probability.squeeze(), anomaly_score.squeeze()
89
+
90
+ return BERTFraudDetector('bert-base-uncased', numerical_features_dim)
91
+
92
+ def load_model(self):
93
+ """Load the pre-trained fraud detection model"""
94
+ try:
95
+ print(f"πŸ”„ Loading model from {self.model_path}...")
96
+
97
+ # Add safe globals for sklearn objects
98
+ torch.serialization.add_safe_globals([
99
+ StandardScaler,
100
+ LabelEncoder,
101
+ IsolationForest
102
+ ])
103
+
104
+ # Load with weights_only=False for backward compatibility
105
+ # This is safe if you trust the source of the model file
106
+ checkpoint = torch.load(self.model_path, map_location=self.device, weights_only=False)
107
+
108
+ # Load preprocessing objects
109
+ self.scaler = checkpoint['scaler']
110
+ self.label_encoder = checkpoint['label_encoder']
111
+ self.isolation_forest = checkpoint['isolation_forest']
112
+
113
+ # Create and load model
114
+ numerical_features_dim = 14 # Same as training
115
+ self.model = self.create_bert_fraud_model(numerical_features_dim)
116
+ self.model.load_state_dict(checkpoint['model_state_dict'])
117
+ self.model.to(self.device)
118
+ self.model.eval()
119
+
120
+ print("βœ… Model loaded successfully!")
121
+
122
+ except FileNotFoundError:
123
+ print(f"❌ Error: Model file '{self.model_path}' not found!")
124
+ print("Make sure you have trained and saved the model first.")
125
+ raise
126
+ except Exception as e:
127
+ print(f"❌ Error loading model: {str(e)}")
128
+ print("If you're still getting errors, try updating PyTorch or ensure the model file is from a trusted source.")
129
+ raise
130
+
131
+ def tokenize_descriptions(self, descriptions, max_length=128):
132
+ """Tokenize transaction descriptions for BERT"""
133
+ # Convert pandas Series to list if needed
134
+ if hasattr(descriptions, 'tolist'):
135
+ descriptions = descriptions.tolist()
136
+ elif isinstance(descriptions, str):
137
+ descriptions = [descriptions]
138
+ elif not isinstance(descriptions, list):
139
+ descriptions = list(descriptions)
140
+
141
+ # Ensure all descriptions are strings
142
+ descriptions = [str(desc) for desc in descriptions]
143
+
144
+ encoded = self.tokenizer(
145
+ descriptions,
146
+ truncation=True,
147
+ padding=True,
148
+ max_length=max_length,
149
+ return_tensors='pt'
150
+ )
151
+
152
+ return encoded['input_ids'], encoded['attention_mask']
153
+
154
+ def preprocess_single_transaction(self, transaction):
155
+ """Preprocess a single transaction for prediction"""
156
+ # Create DataFrame from transaction
157
+ if isinstance(transaction, dict):
158
+ df = pd.DataFrame([transaction])
159
+ else:
160
+ df = pd.DataFrame(transaction)
161
+
162
+ # Feature engineering (same as training)
163
+ df['amount_log'] = np.log1p(df['amount'])
164
+ df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
165
+ df['is_night'] = ((df['hour'] >= 22) | (df['hour'] <= 6)).astype(int)
166
+ df['high_frequency'] = (df['transaction_count_1h'] > 3).astype(int)
167
+ df['amount_deviation'] = abs(df['amount'] - df['avg_amount_1h']) / (df['avg_amount_1h'] + 1)
168
+
169
+ # Handle unknown categories for merchant_category
170
+ try:
171
+ df['merchant_category_encoded'] = self.label_encoder.transform(df['merchant_category'])
172
+ except ValueError as e:
173
+ print(f"⚠️ Warning: Unknown merchant category '{df['merchant_category'].iloc[0]}'. Using default value.")
174
+ # Use the first category as default or assign a default encoded value
175
+ df['merchant_category_encoded'] = 0
176
+
177
+ # Prepare numerical features
178
+ numerical_features = ['amount_log', 'hour', 'day_of_week', 'days_since_last_transaction',
179
+ 'transaction_count_1h', 'transaction_count_24h', 'avg_amount_1h',
180
+ 'location_risk_score', 'account_age_days', 'merchant_category_encoded',
181
+ 'is_weekend', 'is_night', 'high_frequency', 'amount_deviation']
182
+
183
+ X_numerical = self.scaler.transform(df[numerical_features])
184
+
185
+ # Process text - ensure it's a string
186
+ df['processed_description'] = df['description'].astype(str).str.lower().str.replace(r'[^\w\s]', '', regex=True)
187
+
188
+ return df, X_numerical
189
+
190
+ def predict_fraud(self, transactions):
191
+ """Predict fraud for one or more transactions"""
192
+ print("πŸ” Analyzing transactions for fraud...")
193
+
194
+ # Handle single transaction
195
+ if isinstance(transactions, dict):
196
+ transactions = [transactions]
197
+
198
+ results = []
199
+
200
+ for i, transaction in enumerate(transactions):
201
+ try:
202
+ # Preprocess transaction
203
+ df, X_numerical = self.preprocess_single_transaction(transaction)
204
+
205
+ # Tokenize description - extract the actual string values
206
+ processed_descriptions = df['processed_description'].tolist()
207
+ input_ids, attention_masks = self.tokenize_descriptions(processed_descriptions)
208
+
209
+ # Make prediction
210
+ with torch.no_grad():
211
+ batch_num = torch.tensor(X_numerical).float().to(self.device)
212
+ batch_ids = input_ids.to(self.device)
213
+ batch_masks = attention_masks.to(self.device)
214
+
215
+ fraud_prob, anomaly_score = self.model(batch_ids, batch_masks, batch_num)
216
+
217
+ # Get isolation forest prediction
218
+ isolation_pred = self.isolation_forest.decision_function(X_numerical)
219
+
220
+ # Handle single prediction vs batch
221
+ if isinstance(fraud_prob, torch.Tensor):
222
+ if fraud_prob.dim() == 0: # Single prediction
223
+ fraud_prob_val = fraud_prob.item()
224
+ anomaly_score_val = anomaly_score.item()
225
+ else: # Batch prediction
226
+ fraud_prob_val = fraud_prob[0].item()
227
+ anomaly_score_val = anomaly_score[0].item()
228
+ else:
229
+ fraud_prob_val = float(fraud_prob)
230
+ anomaly_score_val = float(anomaly_score)
231
+
232
+ # Combine predictions (ensemble approach)
233
+ combined_score = (0.6 * fraud_prob_val +
234
+ 0.3 * (1 - (isolation_pred[0] + 0.5)) +
235
+ 0.1 * anomaly_score_val)
236
+
237
+ # Create result
238
+ result = {
239
+ 'transaction_id': transaction.get('transaction_id', f'test_{i+1}'),
240
+ 'amount': transaction['amount'],
241
+ 'description': transaction['description'],
242
+ 'fraud_probability': float(combined_score),
243
+ 'is_fraud_predicted': bool(combined_score > 0.5),
244
+ 'risk_level': self.get_risk_level(combined_score),
245
+ 'anomaly_score': float(anomaly_score_val),
246
+ 'bert_score': float(fraud_prob_val),
247
+ 'isolation_score': float(isolation_pred[0])
248
+ }
249
+
250
+ results.append(result)
251
+
252
+ except Exception as e:
253
+ print(f"❌ Error processing transaction {i+1}: {str(e)}")
254
+ import traceback
255
+ traceback.print_exc() # Print full error traceback for debugging
256
+ results.append({
257
+ 'transaction_id': transaction.get('transaction_id', f'test_{i+1}'),
258
+ 'error': str(e)
259
+ })
260
+
261
+ return results
262
+
263
+ def get_risk_level(self, score):
264
+ """Determine risk level based on fraud probability"""
265
+ if score > 0.8:
266
+ return 'CRITICAL'
267
+ elif score > 0.6:
268
+ return 'HIGH'
269
+ elif score > 0.4:
270
+ return 'MEDIUM'
271
+ elif score > 0.2:
272
+ return 'LOW'
273
+ else:
274
+ return 'MINIMAL'
275
+
276
+ def display_results(self, results):
277
+ """Display prediction results in a nice format"""
278
+ print("\n" + "="*80)
279
+ print("🚨 FRAUD DETECTION RESULTS")
280
+ print("="*80)
281
+
282
+ for i, result in enumerate(results):
283
+ if 'error' in result:
284
+ print(f"\n❌ Transaction {i+1}: ERROR - {result['error']}")
285
+ continue
286
+
287
+ print(f"\nπŸ“‹ Transaction {i+1}:")
288
+ print(f" ID: {result['transaction_id']}")
289
+ print(f" Amount: ${result['amount']:.2f}")
290
+ print(f" Description: {result['description']}")
291
+ print(f" 🎯 Fraud Probability: {result['fraud_probability']:.4f} ({result['fraud_probability']*100:.2f}%)")
292
+
293
+ # Color-coded prediction
294
+ if result['is_fraud_predicted']:
295
+ print(f" 🚨 Prediction: FRAUD DETECTED")
296
+ else:
297
+ print(f" βœ… Prediction: LEGITIMATE")
298
+
299
+ print(f" πŸ“Š Risk Level: {result['risk_level']}")
300
+ print(f" πŸ” Anomaly Score: {result['anomaly_score']:.4f}")
301
+ print(f" πŸ€– BERT Score: {result['bert_score']:.4f}")
302
+ print(f" 🏝️ Isolation Score: {result['isolation_score']:.4f}")
303
+
304
+ # Risk indicator
305
+ risk_bar = "β–ˆ" * int(result['fraud_probability'] * 20)
306
+ print(f" πŸ“ˆ Risk Meter: [{risk_bar:<20}] {result['fraud_probability']*100:.1f}%")
307
+
308
+ print("\n" + "="*80)
309
+
310
+ def create_sample_transactions():
311
+ """Create sample transactions for testing"""
312
+ return [
313
+ {
314
+ 'transaction_id': 'TEST_001',
315
+ 'amount': 45.67,
316
+ 'merchant_category': 'grocery',
317
+ 'description': 'WALMART SUPERCENTER CA 1234',
318
+ 'hour': 14,
319
+ 'day_of_week': 2,
320
+ 'days_since_last_transaction': 1.0,
321
+ 'transaction_count_1h': 1,
322
+ 'transaction_count_24h': 3,
323
+ 'avg_amount_1h': 50.0,
324
+ 'location_risk_score': 0.1,
325
+ 'account_age_days': 730
326
+ },
327
+ {
328
+ 'transaction_id': 'TEST_002',
329
+ 'amount': 2999.99,
330
+ 'merchant_category': 'online',
331
+ 'description': 'SUSPICIOUS ELECTRONICS STORE XX 9999',
332
+ 'hour': 3,
333
+ 'day_of_week': 6,
334
+ 'days_since_last_transaction': 60.0,
335
+ 'transaction_count_1h': 12,
336
+ 'transaction_count_24h': 25,
337
+ 'avg_amount_1h': 150.0,
338
+ 'location_risk_score': 0.95,
339
+ 'account_age_days': 15
340
+ },
341
+ {
342
+ 'transaction_id': 'TEST_003',
343
+ 'amount': 89.50,
344
+ 'merchant_category': 'restaurant',
345
+ 'description': 'STARBUCKS COFFEE NY 5678',
346
+ 'hour': 8,
347
+ 'day_of_week': 1,
348
+ 'days_since_last_transaction': 0.5,
349
+ 'transaction_count_1h': 1,
350
+ 'transaction_count_24h': 4,
351
+ 'avg_amount_1h': 85.0,
352
+ 'location_risk_score': 0.2,
353
+ 'account_age_days': 1095
354
+ },
355
+ {
356
+ 'transaction_id': 'TEST_004',
357
+ 'amount': 500.00,
358
+ 'merchant_category': 'atm',
359
+ 'description': 'ATM WITHDRAWAL FOREIGN COUNTRY 0000',
360
+ 'hour': 23,
361
+ 'day_of_week': 0,
362
+ 'days_since_last_transaction': 0.1,
363
+ 'transaction_count_1h': 5,
364
+ 'transaction_count_24h': 8,
365
+ 'avg_amount_1h': 200.0,
366
+ 'location_risk_score': 0.8,
367
+ 'account_age_days': 365
368
+ }
369
+ ]
370
+
371
+ def create_custom_transaction():
372
+ """Interactive function to create custom transaction"""
373
+ print("\nπŸ› οΈ CREATE CUSTOM TRANSACTION")
374
+ print("-" * 40)
375
+
376
+ transaction = {}
377
+
378
+ try:
379
+ transaction['transaction_id'] = input("Transaction ID (optional): ") or 'CUSTOM_001'
380
+ transaction['amount'] = float(input("Amount ($): "))
381
+
382
+ print("Merchant categories: grocery, gas_station, restaurant, online, retail, atm")
383
+ transaction['merchant_category'] = input("Merchant category: ") or 'online'
384
+
385
+ transaction['description'] = input("Transaction description: ") or 'Unknown merchant'
386
+ transaction['hour'] = int(input("Hour (0-23): "))
387
+ transaction['day_of_week'] = int(input("Day of week (0=Monday, 6=Sunday): "))
388
+ transaction['days_since_last_transaction'] = float(input("Days since last transaction: "))
389
+ transaction['transaction_count_1h'] = int(input("Transactions in last hour: "))
390
+ transaction['transaction_count_24h'] = int(input("Transactions in last 24 hours: "))
391
+ transaction['avg_amount_1h'] = float(input("Average amount in last hour ($): "))
392
+ transaction['location_risk_score'] = float(input("Location risk score (0-1): "))
393
+ transaction['account_age_days'] = float(input("Account age in days: "))
394
+
395
+ return transaction
396
+
397
+ except ValueError as e:
398
+ print(f"❌ Invalid input: {e}")
399
+ return None
400
+
401
+ def main():
402
+ """Main testing function"""
403
+ print("πŸš€ FRAUD DETECTION MODEL TESTER")
404
+ print("="*50)
405
+
406
+ # Initialize tester
407
+ try:
408
+ tester = FraudDetectionTester('fraud_detection_model.pth')
409
+ except:
410
+ print("Make sure you have the trained model file 'fraud_detection_model.pth' in the same directory!")
411
+ return
412
+
413
+ while True:
414
+ print("\nπŸ“‹ TESTING OPTIONS:")
415
+ print("1. Test with sample transactions")
416
+ print("2. Create custom transaction")
417
+ print("3. Test single transaction")
418
+ print("4. Exit")
419
+
420
+ choice = input("\nEnter your choice (1-4): ").strip()
421
+
422
+ if choice == '1':
423
+ # Test with sample transactions
424
+ sample_transactions = create_sample_transactions()
425
+ results = tester.predict_fraud(sample_transactions)
426
+ tester.display_results(results)
427
+
428
+ elif choice == '2':
429
+ # Create custom transaction
430
+ custom_transaction = create_custom_transaction()
431
+ if custom_transaction:
432
+ results = tester.predict_fraud([custom_transaction])
433
+ tester.display_results(results)
434
+
435
+ elif choice == '3':
436
+ # Quick single transaction test
437
+ print("\n⚑ QUICK TRANSACTION TEST")
438
+ print("-" * 30)
439
+
440
+ try:
441
+ quick_transaction = {
442
+ 'transaction_id': 'QUICK_TEST',
443
+ 'amount': float(input("Amount ($): ")),
444
+ 'merchant_category': 'online',
445
+ 'description': input("Description: ") or 'Unknown transaction',
446
+ 'hour': int(input("Hour (0-23): ")),
447
+ 'day_of_week': 2,
448
+ 'days_since_last_transaction': 1.0,
449
+ 'transaction_count_1h': int(input("Transactions in last hour: ")),
450
+ 'transaction_count_24h': 5,
451
+ 'avg_amount_1h': 100.0,
452
+ 'location_risk_score': float(input("Risk score (0-1): ")),
453
+ 'account_age_days': 365
454
+ }
455
+
456
+ results = tester.predict_fraud([quick_transaction])
457
+ tester.display_results(results)
458
+
459
+ except ValueError as e:
460
+ print(f"❌ Invalid input: {e}")
461
+
462
+ elif choice == '4':
463
+ print("πŸ‘‹ Goodbye!")
464
+ break
465
+
466
+ else:
467
+ print("❌ Invalid choice! Please enter 1-4.")
468
+
469
+ if __name__ == "__main__":
470
+ main()