Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import torch.nn as nn
|
|
5 |
from transformers import BertTokenizer, BertModel
|
6 |
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
7 |
from sklearn.ensemble import IsolationForest
|
|
|
8 |
import warnings
|
9 |
warnings.filterwarnings('ignore')
|
10 |
|
@@ -101,8 +102,6 @@ class FraudDetectionTester:
|
|
101 |
IsolationForest
|
102 |
])
|
103 |
|
104 |
-
# Load with weights_only=False for backward compatibility
|
105 |
-
# This is safe if you trust the source of the model file
|
106 |
checkpoint = torch.load(self.model_path, map_location=self.device, weights_only=False)
|
107 |
|
108 |
# Load preprocessing objects
|
@@ -125,12 +124,10 @@ class FraudDetectionTester:
|
|
125 |
raise
|
126 |
except Exception as e:
|
127 |
print(f"β Error loading model: {str(e)}")
|
128 |
-
print("If you're still getting errors, try updating PyTorch or ensure the model file is from a trusted source.")
|
129 |
raise
|
130 |
|
131 |
def tokenize_descriptions(self, descriptions, max_length=128):
|
132 |
"""Tokenize transaction descriptions for BERT"""
|
133 |
-
# Convert pandas Series to list if needed
|
134 |
if hasattr(descriptions, 'tolist'):
|
135 |
descriptions = descriptions.tolist()
|
136 |
elif isinstance(descriptions, str):
|
@@ -138,7 +135,6 @@ class FraudDetectionTester:
|
|
138 |
elif not isinstance(descriptions, list):
|
139 |
descriptions = list(descriptions)
|
140 |
|
141 |
-
# Ensure all descriptions are strings
|
142 |
descriptions = [str(desc) for desc in descriptions]
|
143 |
|
144 |
encoded = self.tokenizer(
|
@@ -153,25 +149,22 @@ class FraudDetectionTester:
|
|
153 |
|
154 |
def preprocess_single_transaction(self, transaction):
|
155 |
"""Preprocess a single transaction for prediction"""
|
156 |
-
# Create DataFrame from transaction
|
157 |
if isinstance(transaction, dict):
|
158 |
df = pd.DataFrame([transaction])
|
159 |
else:
|
160 |
df = pd.DataFrame(transaction)
|
161 |
|
162 |
-
# Feature engineering
|
163 |
df['amount_log'] = np.log1p(df['amount'])
|
164 |
df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
|
165 |
df['is_night'] = ((df['hour'] >= 22) | (df['hour'] <= 6)).astype(int)
|
166 |
df['high_frequency'] = (df['transaction_count_1h'] > 3).astype(int)
|
167 |
df['amount_deviation'] = abs(df['amount'] - df['avg_amount_1h']) / (df['avg_amount_1h'] + 1)
|
168 |
|
169 |
-
# Handle unknown categories
|
170 |
try:
|
171 |
df['merchant_category_encoded'] = self.label_encoder.transform(df['merchant_category'])
|
172 |
-
except ValueError
|
173 |
-
print(f"β οΈ Warning: Unknown merchant category '{df['merchant_category'].iloc[0]}'. Using default value.")
|
174 |
-
# Use the first category as default or assign a default encoded value
|
175 |
df['merchant_category_encoded'] = 0
|
176 |
|
177 |
# Prepare numerical features
|
@@ -182,83 +175,60 @@ class FraudDetectionTester:
|
|
182 |
|
183 |
X_numerical = self.scaler.transform(df[numerical_features])
|
184 |
|
185 |
-
# Process text
|
186 |
df['processed_description'] = df['description'].astype(str).str.lower().str.replace(r'[^\w\s]', '', regex=True)
|
187 |
|
188 |
return df, X_numerical
|
189 |
|
190 |
-
def predict_fraud(self,
|
191 |
-
"""Predict fraud for
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
# Tokenize description - extract the actual string values
|
206 |
-
processed_descriptions = df['processed_description'].tolist()
|
207 |
-
input_ids, attention_masks = self.tokenize_descriptions(processed_descriptions)
|
208 |
|
209 |
-
|
210 |
-
with torch.no_grad():
|
211 |
-
batch_num = torch.tensor(X_numerical).float().to(self.device)
|
212 |
-
batch_ids = input_ids.to(self.device)
|
213 |
-
batch_masks = attention_masks.to(self.device)
|
214 |
-
|
215 |
-
fraud_prob, anomaly_score = self.model(batch_ids, batch_masks, batch_num)
|
216 |
-
|
217 |
-
# Get isolation forest prediction
|
218 |
-
isolation_pred = self.isolation_forest.decision_function(X_numerical)
|
219 |
-
|
220 |
-
# Handle single prediction vs batch
|
221 |
-
if isinstance(fraud_prob, torch.Tensor):
|
222 |
-
if fraud_prob.dim() == 0: # Single prediction
|
223 |
-
fraud_prob_val = fraud_prob.item()
|
224 |
-
anomaly_score_val = anomaly_score.item()
|
225 |
-
else: # Batch prediction
|
226 |
-
fraud_prob_val = fraud_prob[0].item()
|
227 |
-
anomaly_score_val = anomaly_score[0].item()
|
228 |
-
else:
|
229 |
-
fraud_prob_val = float(fraud_prob)
|
230 |
-
anomaly_score_val = float(anomaly_score)
|
231 |
-
|
232 |
-
# Combine predictions (ensemble approach)
|
233 |
-
combined_score = (0.6 * fraud_prob_val +
|
234 |
-
0.3 * (1 - (isolation_pred[0] + 0.5)) +
|
235 |
-
0.1 * anomaly_score_val)
|
236 |
|
237 |
-
#
|
238 |
-
|
239 |
-
'transaction_id': transaction.get('transaction_id', f'test_{i+1}'),
|
240 |
-
'amount': transaction['amount'],
|
241 |
-
'description': transaction['description'],
|
242 |
-
'fraud_probability': float(combined_score),
|
243 |
-
'is_fraud_predicted': bool(combined_score > 0.5),
|
244 |
-
'risk_level': self.get_risk_level(combined_score),
|
245 |
-
'anomaly_score': float(anomaly_score_val),
|
246 |
-
'bert_score': float(fraud_prob_val),
|
247 |
-
'isolation_score': float(isolation_pred[0])
|
248 |
-
}
|
249 |
|
250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
def get_risk_level(self, score):
|
264 |
"""Determine risk level based on fraud probability"""
|
@@ -272,46 +242,89 @@ class FraudDetectionTester:
|
|
272 |
return 'LOW'
|
273 |
else:
|
274 |
return 'MINIMAL'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
|
276 |
-
|
277 |
-
"
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
|
310 |
-
def
|
311 |
-
"""
|
312 |
-
|
313 |
-
{
|
314 |
-
'transaction_id': '
|
315 |
'amount': 45.67,
|
316 |
'merchant_category': 'grocery',
|
317 |
'description': 'WALMART SUPERCENTER CA 1234',
|
@@ -324,8 +337,8 @@ def create_sample_transactions():
|
|
324 |
'location_risk_score': 0.1,
|
325 |
'account_age_days': 730
|
326 |
},
|
327 |
-
{
|
328 |
-
'transaction_id': '
|
329 |
'amount': 2999.99,
|
330 |
'merchant_category': 'online',
|
331 |
'description': 'SUSPICIOUS ELECTRONICS STORE XX 9999',
|
@@ -338,9 +351,9 @@ def create_sample_transactions():
|
|
338 |
'location_risk_score': 0.95,
|
339 |
'account_age_days': 15
|
340 |
},
|
341 |
-
{
|
342 |
-
'transaction_id': '
|
343 |
-
'amount':
|
344 |
'merchant_category': 'restaurant',
|
345 |
'description': 'STARBUCKS COFFEE NY 5678',
|
346 |
'hour': 8,
|
@@ -348,12 +361,12 @@ def create_sample_transactions():
|
|
348 |
'days_since_last_transaction': 0.5,
|
349 |
'transaction_count_1h': 1,
|
350 |
'transaction_count_24h': 4,
|
351 |
-
'avg_amount_1h':
|
352 |
'location_risk_score': 0.2,
|
353 |
'account_age_days': 1095
|
354 |
},
|
355 |
-
{
|
356 |
-
'transaction_id': '
|
357 |
'amount': 500.00,
|
358 |
'merchant_category': 'atm',
|
359 |
'description': 'ATM WITHDRAWAL FOREIGN COUNTRY 0000',
|
@@ -366,105 +379,136 @@ def create_sample_transactions():
|
|
366 |
'location_risk_score': 0.8,
|
367 |
'account_age_days': 365
|
368 |
}
|
369 |
-
|
370 |
-
|
371 |
-
def create_custom_transaction():
|
372 |
-
"""Interactive function to create custom transaction"""
|
373 |
-
print("\nπ οΈ CREATE CUSTOM TRANSACTION")
|
374 |
-
print("-" * 40)
|
375 |
-
|
376 |
-
transaction = {}
|
377 |
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
return transaction
|
396 |
-
|
397 |
-
except ValueError as e:
|
398 |
-
print(f"β Invalid input: {e}")
|
399 |
-
return None
|
400 |
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
|
|
405 |
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
except:
|
410 |
-
print("Make sure you have the trained model file 'fraud_detection_model.pth' in the same directory!")
|
411 |
-
return
|
412 |
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
print("2. Create custom transaction")
|
417 |
-
print("3. Test single transaction")
|
418 |
-
print("4. Exit")
|
419 |
-
|
420 |
-
choice = input("\nEnter your choice (1-4): ").strip()
|
421 |
-
|
422 |
-
if choice == '1':
|
423 |
-
# Test with sample transactions
|
424 |
-
sample_transactions = create_sample_transactions()
|
425 |
-
results = tester.predict_fraud(sample_transactions)
|
426 |
-
tester.display_results(results)
|
427 |
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
|
|
|
|
434 |
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
'day_of_week': 2,
|
448 |
-
'days_since_last_transaction': 1.0,
|
449 |
-
'transaction_count_1h': int(input("Transactions in last hour: ")),
|
450 |
-
'transaction_count_24h': 5,
|
451 |
-
'avg_amount_1h': 100.0,
|
452 |
-
'location_risk_score': float(input("Risk score (0-1): ")),
|
453 |
-
'account_age_days': 365
|
454 |
-
}
|
455 |
-
|
456 |
-
results = tester.predict_fraud([quick_transaction])
|
457 |
-
tester.display_results(results)
|
458 |
-
|
459 |
-
except ValueError as e:
|
460 |
-
print(f"β Invalid input: {e}")
|
461 |
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
|
466 |
-
|
467 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
|
|
|
469 |
if __name__ == "__main__":
|
470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from transformers import BertTokenizer, BertModel
|
6 |
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
7 |
from sklearn.ensemble import IsolationForest
|
8 |
+
import gradio as gr
|
9 |
import warnings
|
10 |
warnings.filterwarnings('ignore')
|
11 |
|
|
|
102 |
IsolationForest
|
103 |
])
|
104 |
|
|
|
|
|
105 |
checkpoint = torch.load(self.model_path, map_location=self.device, weights_only=False)
|
106 |
|
107 |
# Load preprocessing objects
|
|
|
124 |
raise
|
125 |
except Exception as e:
|
126 |
print(f"β Error loading model: {str(e)}")
|
|
|
127 |
raise
|
128 |
|
129 |
def tokenize_descriptions(self, descriptions, max_length=128):
|
130 |
"""Tokenize transaction descriptions for BERT"""
|
|
|
131 |
if hasattr(descriptions, 'tolist'):
|
132 |
descriptions = descriptions.tolist()
|
133 |
elif isinstance(descriptions, str):
|
|
|
135 |
elif not isinstance(descriptions, list):
|
136 |
descriptions = list(descriptions)
|
137 |
|
|
|
138 |
descriptions = [str(desc) for desc in descriptions]
|
139 |
|
140 |
encoded = self.tokenizer(
|
|
|
149 |
|
150 |
def preprocess_single_transaction(self, transaction):
|
151 |
"""Preprocess a single transaction for prediction"""
|
|
|
152 |
if isinstance(transaction, dict):
|
153 |
df = pd.DataFrame([transaction])
|
154 |
else:
|
155 |
df = pd.DataFrame(transaction)
|
156 |
|
157 |
+
# Feature engineering
|
158 |
df['amount_log'] = np.log1p(df['amount'])
|
159 |
df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
|
160 |
df['is_night'] = ((df['hour'] >= 22) | (df['hour'] <= 6)).astype(int)
|
161 |
df['high_frequency'] = (df['transaction_count_1h'] > 3).astype(int)
|
162 |
df['amount_deviation'] = abs(df['amount'] - df['avg_amount_1h']) / (df['avg_amount_1h'] + 1)
|
163 |
|
164 |
+
# Handle unknown categories
|
165 |
try:
|
166 |
df['merchant_category_encoded'] = self.label_encoder.transform(df['merchant_category'])
|
167 |
+
except ValueError:
|
|
|
|
|
168 |
df['merchant_category_encoded'] = 0
|
169 |
|
170 |
# Prepare numerical features
|
|
|
175 |
|
176 |
X_numerical = self.scaler.transform(df[numerical_features])
|
177 |
|
178 |
+
# Process text
|
179 |
df['processed_description'] = df['description'].astype(str).str.lower().str.replace(r'[^\w\s]', '', regex=True)
|
180 |
|
181 |
return df, X_numerical
|
182 |
|
183 |
+
def predict_fraud(self, transaction):
|
184 |
+
"""Predict fraud for a single transaction"""
|
185 |
+
try:
|
186 |
+
# Preprocess transaction
|
187 |
+
df, X_numerical = self.preprocess_single_transaction(transaction)
|
188 |
+
|
189 |
+
# Tokenize description
|
190 |
+
processed_descriptions = df['processed_description'].tolist()
|
191 |
+
input_ids, attention_masks = self.tokenize_descriptions(processed_descriptions)
|
192 |
+
|
193 |
+
# Make prediction
|
194 |
+
with torch.no_grad():
|
195 |
+
batch_num = torch.tensor(X_numerical).float().to(self.device)
|
196 |
+
batch_ids = input_ids.to(self.device)
|
197 |
+
batch_masks = attention_masks.to(self.device)
|
|
|
|
|
|
|
198 |
|
199 |
+
fraud_prob, anomaly_score = self.model(batch_ids, batch_masks, batch_num)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
+
# Get isolation forest prediction
|
202 |
+
isolation_pred = self.isolation_forest.decision_function(X_numerical)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
+
# Handle single prediction
|
205 |
+
if isinstance(fraud_prob, torch.Tensor):
|
206 |
+
if fraud_prob.dim() == 0:
|
207 |
+
fraud_prob_val = fraud_prob.item()
|
208 |
+
anomaly_score_val = anomaly_score.item()
|
209 |
+
else:
|
210 |
+
fraud_prob_val = fraud_prob[0].item()
|
211 |
+
anomaly_score_val = anomaly_score[0].item()
|
212 |
+
else:
|
213 |
+
fraud_prob_val = float(fraud_prob)
|
214 |
+
anomaly_score_val = float(anomaly_score)
|
215 |
|
216 |
+
# Combine predictions
|
217 |
+
combined_score = (0.6 * fraud_prob_val +
|
218 |
+
0.3 * (1 - (isolation_pred[0] + 0.5)) +
|
219 |
+
0.1 * anomaly_score_val)
|
220 |
+
|
221 |
+
return {
|
222 |
+
'fraud_probability': float(combined_score),
|
223 |
+
'is_fraud_predicted': bool(combined_score > 0.5),
|
224 |
+
'risk_level': self.get_risk_level(combined_score),
|
225 |
+
'anomaly_score': float(anomaly_score_val),
|
226 |
+
'bert_score': float(fraud_prob_val),
|
227 |
+
'isolation_score': float(isolation_pred[0])
|
228 |
+
}
|
229 |
+
|
230 |
+
except Exception as e:
|
231 |
+
return {'error': str(e)}
|
232 |
|
233 |
def get_risk_level(self, score):
|
234 |
"""Determine risk level based on fraud probability"""
|
|
|
242 |
return 'LOW'
|
243 |
else:
|
244 |
return 'MINIMAL'
|
245 |
+
|
246 |
+
# Initialize the fraud detection model
|
247 |
+
print("Initializing fraud detection model...")
|
248 |
+
try:
|
249 |
+
fraud_detector = FraudDetectionTester('fraud_detection_model.pth')
|
250 |
+
model_loaded = True
|
251 |
+
except Exception as e:
|
252 |
+
print(f"Failed to load model: {e}")
|
253 |
+
model_loaded = False
|
254 |
+
|
255 |
+
def predict_transaction_fraud(
|
256 |
+
transaction_id,
|
257 |
+
amount,
|
258 |
+
merchant_category,
|
259 |
+
description,
|
260 |
+
hour,
|
261 |
+
day_of_week,
|
262 |
+
days_since_last_transaction,
|
263 |
+
transaction_count_1h,
|
264 |
+
transaction_count_24h,
|
265 |
+
avg_amount_1h,
|
266 |
+
location_risk_score,
|
267 |
+
account_age_days
|
268 |
+
):
|
269 |
+
"""Gradio interface function for fraud prediction"""
|
270 |
|
271 |
+
if not model_loaded:
|
272 |
+
return "β Model not loaded. Please ensure 'fraud_detection_model.pth' is available.", "", "", "", "", ""
|
273 |
+
|
274 |
+
# Create transaction dictionary
|
275 |
+
transaction = {
|
276 |
+
'transaction_id': transaction_id,
|
277 |
+
'amount': amount,
|
278 |
+
'merchant_category': merchant_category,
|
279 |
+
'description': description,
|
280 |
+
'hour': hour,
|
281 |
+
'day_of_week': day_of_week,
|
282 |
+
'days_since_last_transaction': days_since_last_transaction,
|
283 |
+
'transaction_count_1h': transaction_count_1h,
|
284 |
+
'transaction_count_24h': transaction_count_24h,
|
285 |
+
'avg_amount_1h': avg_amount_1h,
|
286 |
+
'location_risk_score': location_risk_score,
|
287 |
+
'account_age_days': account_age_days
|
288 |
+
}
|
289 |
+
|
290 |
+
# Get prediction
|
291 |
+
result = fraud_detector.predict_fraud(transaction)
|
292 |
+
|
293 |
+
if 'error' in result:
|
294 |
+
return f"β Error: {result['error']}", "", "", "", "", ""
|
295 |
+
|
296 |
+
# Format results
|
297 |
+
fraud_prob = result['fraud_probability']
|
298 |
+
prediction = "π¨ FRAUD DETECTED" if result['is_fraud_predicted'] else "β
LEGITIMATE"
|
299 |
+
risk_level = result['risk_level']
|
300 |
+
|
301 |
+
# Create risk meter visualization
|
302 |
+
risk_bar = "β" * int(fraud_prob * 20) + "β" * (20 - int(fraud_prob * 20))
|
303 |
+
risk_meter = f"[{risk_bar}] {fraud_prob*100:.1f}%"
|
304 |
+
|
305 |
+
# Detailed scores
|
306 |
+
detailed_scores = f"""
|
307 |
+
π€ BERT Score: {result['bert_score']:.4f}
|
308 |
+
ποΈ Isolation Score: {result['isolation_score']:.4f}
|
309 |
+
π Anomaly Score: {result['anomaly_score']:.4f}
|
310 |
+
"""
|
311 |
+
|
312 |
+
# Summary
|
313 |
+
summary = f"""
|
314 |
+
π° Amount: ${amount:.2f}
|
315 |
+
πͺ Category: {merchant_category}
|
316 |
+
π Description: {description}
|
317 |
+
π― Fraud Probability: {fraud_prob:.4f} ({fraud_prob*100:.2f}%)
|
318 |
+
π Risk Level: {risk_level}
|
319 |
+
"""
|
320 |
+
|
321 |
+
return prediction, f"{fraud_prob:.4f}", risk_level, risk_meter, detailed_scores, summary
|
322 |
|
323 |
+
def load_sample_transaction(sample_type):
|
324 |
+
"""Load predefined sample transactions"""
|
325 |
+
samples = {
|
326 |
+
"Normal Grocery Purchase": {
|
327 |
+
'transaction_id': 'NORMAL_001',
|
328 |
'amount': 45.67,
|
329 |
'merchant_category': 'grocery',
|
330 |
'description': 'WALMART SUPERCENTER CA 1234',
|
|
|
337 |
'location_risk_score': 0.1,
|
338 |
'account_age_days': 730
|
339 |
},
|
340 |
+
"Suspicious High Amount": {
|
341 |
+
'transaction_id': 'SUSPICIOUS_001',
|
342 |
'amount': 2999.99,
|
343 |
'merchant_category': 'online',
|
344 |
'description': 'SUSPICIOUS ELECTRONICS STORE XX 9999',
|
|
|
351 |
'location_risk_score': 0.95,
|
352 |
'account_age_days': 15
|
353 |
},
|
354 |
+
"Coffee Shop Purchase": {
|
355 |
+
'transaction_id': 'COFFEE_001',
|
356 |
+
'amount': 8.50,
|
357 |
'merchant_category': 'restaurant',
|
358 |
'description': 'STARBUCKS COFFEE NY 5678',
|
359 |
'hour': 8,
|
|
|
361 |
'days_since_last_transaction': 0.5,
|
362 |
'transaction_count_1h': 1,
|
363 |
'transaction_count_24h': 4,
|
364 |
+
'avg_amount_1h': 8.50,
|
365 |
'location_risk_score': 0.2,
|
366 |
'account_age_days': 1095
|
367 |
},
|
368 |
+
"Foreign ATM Withdrawal": {
|
369 |
+
'transaction_id': 'ATM_001',
|
370 |
'amount': 500.00,
|
371 |
'merchant_category': 'atm',
|
372 |
'description': 'ATM WITHDRAWAL FOREIGN COUNTRY 0000',
|
|
|
379 |
'location_risk_score': 0.8,
|
380 |
'account_age_days': 365
|
381 |
}
|
382 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
383 |
|
384 |
+
if sample_type in samples:
|
385 |
+
sample = samples[sample_type]
|
386 |
+
return (
|
387 |
+
sample['transaction_id'],
|
388 |
+
sample['amount'],
|
389 |
+
sample['merchant_category'],
|
390 |
+
sample['description'],
|
391 |
+
sample['hour'],
|
392 |
+
sample['day_of_week'],
|
393 |
+
sample['days_since_last_transaction'],
|
394 |
+
sample['transaction_count_1h'],
|
395 |
+
sample['transaction_count_24h'],
|
396 |
+
sample['avg_amount_1h'],
|
397 |
+
sample['location_risk_score'],
|
398 |
+
sample['account_age_days']
|
399 |
+
)
|
400 |
+
return [""] * 12
|
|
|
|
|
|
|
|
|
|
|
401 |
|
402 |
+
# Create Gradio interface
|
403 |
+
with gr.Blocks(title="π¨ Fraud Detection System", theme=gr.themes.Soft()) as demo:
|
404 |
+
gr.Markdown("""
|
405 |
+
# π¨ Advanced Fraud Detection System
|
406 |
+
### Powered by BERT and Machine Learning
|
407 |
|
408 |
+
This system analyzes financial transactions using advanced AI to detect potential fraud.
|
409 |
+
Enter transaction details below or use sample transactions to test the system.
|
410 |
+
""")
|
|
|
|
|
|
|
411 |
|
412 |
+
with gr.Row():
|
413 |
+
with gr.Column(scale=2):
|
414 |
+
gr.Markdown("## π Transaction Details")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
|
416 |
+
# Sample transaction selector
|
417 |
+
with gr.Row():
|
418 |
+
sample_dropdown = gr.Dropdown(
|
419 |
+
choices=["Normal Grocery Purchase", "Suspicious High Amount", "Coffee Shop Purchase", "Foreign ATM Withdrawal"],
|
420 |
+
label="π― Load Sample Transaction",
|
421 |
+
value="Normal Grocery Purchase"
|
422 |
+
)
|
423 |
+
load_sample_btn = gr.Button("π₯ Load Sample", variant="secondary")
|
424 |
|
425 |
+
# Transaction inputs
|
426 |
+
with gr.Row():
|
427 |
+
transaction_id = gr.Textbox(label="Transaction ID", value="TEST_001")
|
428 |
+
amount = gr.Number(label="π° Amount ($)", value=45.67, minimum=0)
|
429 |
|
430 |
+
with gr.Row():
|
431 |
+
merchant_category = gr.Dropdown(
|
432 |
+
choices=["grocery", "restaurant", "gas_station", "retail", "online", "atm", "pharmacy", "entertainment"],
|
433 |
+
label="πͺ Merchant Category",
|
434 |
+
value="grocery"
|
435 |
+
)
|
436 |
+
description = gr.Textbox(label="π Transaction Description", value="WALMART SUPERCENTER CA 1234")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
|
438 |
+
with gr.Row():
|
439 |
+
hour = gr.Slider(label="π Hour of Day", minimum=0, maximum=23, value=14, step=1)
|
440 |
+
day_of_week = gr.Slider(label="π
Day of Week (0=Mon, 6=Sun)", minimum=0, maximum=6, value=2, step=1)
|
441 |
|
442 |
+
with gr.Row():
|
443 |
+
days_since_last = gr.Number(label="π Days Since Last Transaction", value=1.0, minimum=0)
|
444 |
+
transaction_count_1h = gr.Number(label="π’ Transactions (1h)", value=1, minimum=0)
|
445 |
+
|
446 |
+
with gr.Row():
|
447 |
+
transaction_count_24h = gr.Number(label="π’ Transactions (24h)", value=3, minimum=0)
|
448 |
+
avg_amount_1h = gr.Number(label="π΅ Avg Amount (1h)", value=50.0, minimum=0)
|
449 |
+
|
450 |
+
with gr.Row():
|
451 |
+
location_risk_score = gr.Slider(label="π Location Risk Score", minimum=0, maximum=1, value=0.1, step=0.01)
|
452 |
+
account_age_days = gr.Number(label="π€ Account Age (days)", value=730, minimum=0)
|
453 |
+
|
454 |
+
predict_btn = gr.Button("π Analyze Transaction", variant="primary", size="lg")
|
455 |
+
|
456 |
+
with gr.Column(scale=1):
|
457 |
+
gr.Markdown("## π Fraud Analysis Results")
|
458 |
+
|
459 |
+
prediction_output = gr.Textbox(label="π― Prediction", interactive=False)
|
460 |
+
fraud_prob_output = gr.Textbox(label="π Fraud Probability", interactive=False)
|
461 |
+
risk_level_output = gr.Textbox(label="β οΈ Risk Level", interactive=False)
|
462 |
+
risk_meter_output = gr.Textbox(label="π Risk Meter", interactive=False, font_family="monospace")
|
463 |
+
detailed_scores_output = gr.Textbox(label="π Detailed Scores", interactive=False, lines=4)
|
464 |
+
summary_output = gr.Textbox(label="π Summary", interactive=False, lines=6)
|
465 |
+
|
466 |
+
# Event handlers
|
467 |
+
predict_btn.click(
|
468 |
+
fn=predict_transaction_fraud,
|
469 |
+
inputs=[
|
470 |
+
transaction_id, amount, merchant_category, description, hour, day_of_week,
|
471 |
+
days_since_last, transaction_count_1h, transaction_count_24h, avg_amount_1h,
|
472 |
+
location_risk_score, account_age_days
|
473 |
+
],
|
474 |
+
outputs=[
|
475 |
+
prediction_output, fraud_prob_output, risk_level_output,
|
476 |
+
risk_meter_output, detailed_scores_output, summary_output
|
477 |
+
]
|
478 |
+
)
|
479 |
+
|
480 |
+
load_sample_btn.click(
|
481 |
+
fn=load_sample_transaction,
|
482 |
+
inputs=[sample_dropdown],
|
483 |
+
outputs=[
|
484 |
+
transaction_id, amount, merchant_category, description, hour, day_of_week,
|
485 |
+
days_since_last, transaction_count_1h, transaction_count_24h, avg_amount_1h,
|
486 |
+
location_risk_score, account_age_days
|
487 |
+
]
|
488 |
+
)
|
489 |
+
|
490 |
+
gr.Markdown("""
|
491 |
+
---
|
492 |
+
### π How to Use:
|
493 |
+
1. **Load Sample**: Choose a predefined sample transaction to quickly test the system
|
494 |
+
2. **Enter Details**: Fill in transaction information manually or modify loaded samples
|
495 |
+
3. **Analyze**: Click "Analyze Transaction" to get fraud detection results
|
496 |
+
|
497 |
+
### π― Understanding Results:
|
498 |
+
- **Fraud Probability**: Higher values indicate higher fraud risk (0-1 scale)
|
499 |
+
- **Risk Levels**: MINIMAL β LOW β MEDIUM β HIGH β CRITICAL
|
500 |
+
- **Risk Meter**: Visual representation of fraud probability
|
501 |
+
- **Detailed Scores**: Individual model component scores
|
502 |
+
|
503 |
+
### β οΈ Model Requirements:
|
504 |
+
Ensure `fraud_detection_model.pth` is available in the same directory as this script.
|
505 |
+
""")
|
506 |
|
507 |
+
# Launch the interface
|
508 |
if __name__ == "__main__":
|
509 |
+
demo.launch(
|
510 |
+
server_name="0.0.0.0",
|
511 |
+
server_port=7860,
|
512 |
+
share=False,
|
513 |
+
debug=True
|
514 |
+
)
|