vishalsh13 commited on
Commit
a6b2f62
·
1 Parent(s): 0ab515f

initial commit

Browse files
Files changed (2) hide show
  1. app.py +173 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import re
7
+ from sklearn.ensemble import RandomForestClassifier
8
+ from sklearn.preprocessing import LabelEncoder
9
+ from fuzzywuzzy import process
10
+
11
+ # Enhanced data generation with realistic fraud patterns
12
+ def load_data():
13
+ np.random.seed(42)
14
+ cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
15
+ age_groups = ['18-25', '26-35', '36-45', '46-55', '56+']
16
+ incomes = ['Low', 'Medium', 'High']
17
+
18
+ data = pd.DataFrame({
19
+ 'TransactionID': range(1, 1001),
20
+ 'Amount': np.random.uniform(10, 15000, 1000).round(2),
21
+ 'Type': np.random.choice(['Credit', 'Debit'], 1000),
22
+ 'City': np.random.choice(cities, 1000),
23
+ 'Age': np.random.randint(18, 70, 1000),
24
+ 'Income': np.random.choice(incomes, 1000, p=[0.4, 0.4, 0.2])
25
+ })
26
+
27
+ # Create realistic fraud patterns
28
+ data['Fraud'] = 0
29
+ data.loc[
30
+ ((data['Amount'] > 5000) & (data['Income'] == 'Low')) |
31
+ ((data['Type'] == 'Credit') & (data['Amount'] > 8000)) |
32
+ ((data['City'] == 'New York') & (data['Age'].between(20, 35)) & (data['Amount'] > 6000)),
33
+ 'Fraud'
34
+ ] = 1
35
+
36
+ return data
37
+
38
+ data = load_data()
39
+
40
+ # Preprocessing
41
+ le = LabelEncoder()
42
+ data['Type_encoded'] = le.fit_transform(data['Type'])
43
+ data['City_encoded'] = le.fit_transform(data['City'])
44
+ data['Income_encoded'] = le.fit_transform(data['Income'])
45
+
46
+ # Train model
47
+ features = ['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']
48
+ X = data[features]
49
+ y = data['Fraud']
50
+
51
+ model = RandomForestClassifier(random_state=42, n_estimators=100)
52
+ model.fit(X, y)
53
+
54
+ # Enhanced NLP processing with fuzzy matching
55
+ def process_nl_query(query):
56
+ try:
57
+ # Extract amount
58
+ amount_match = re.search(r'\$?(\d+(?:,\d{3})*(?:\.\d{2})?)', query)
59
+ if amount_match:
60
+ amount = float(amount_match.group(1).replace(',', ''))
61
+ else:
62
+ return "Error: Could not extract transaction amount. Please specify the amount clearly."
63
+
64
+ # Extract transaction type
65
+ trans_type = 'Credit' if 'credit' in query.lower() else 'Debit'
66
+
67
+ # Fuzzy match city
68
+ cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
69
+ city_match = process.extractOne(query, cities)
70
+ city = city_match[0] if city_match[1] > 70 else None
71
+
72
+ # Extract age
73
+ age_match = re.search(r'(\d+)\s*(?:years?|yrs?)?(?:\s*old)?', query)
74
+ if age_match:
75
+ age = int(age_match.group(1))
76
+ else:
77
+ return "Error: Could not extract age. Please specify the age clearly."
78
+
79
+ # Extract income level
80
+ income = 'Low' if 'low' in query.lower() else \
81
+ 'High' if 'high' in query.lower() else 'Medium'
82
+
83
+ # Prepare input
84
+ input_df = pd.DataFrame({
85
+ 'Amount': [amount],
86
+ 'Type_encoded': le.transform([trans_type])[0],
87
+ 'City_encoded': le.transform([city])[0] if city else -1,
88
+ 'Age': [age],
89
+ 'Income_encoded': le.transform([income])[0]
90
+ })
91
+
92
+ # Predict
93
+ proba = model.predict_proba(input_df)[0][1]
94
+ prediction = model.predict(input_df)[0]
95
+
96
+ # Generate explanation
97
+ explanation = []
98
+ if amount > 5000 and income == 'Low':
99
+ explanation.append("High amount for low income")
100
+ if amount > 8000 and trans_type == 'Credit':
101
+ explanation.append("Unusually large credit transaction")
102
+ if city == 'New York' and 20 <= age <= 35 and amount > 6000:
103
+ explanation.append("Suspicious pattern for young adults in NYC")
104
+
105
+ return (
106
+ f"Transaction Details:\n"
107
+ f"- Amount: ${amount:,.2f}\n"
108
+ f"- Type: {trans_type}\n"
109
+ f"- City: {city if city else 'Unknown'}\n"
110
+ f"- Age: {age}\n"
111
+ f"- Income Level: {income}\n\n"
112
+ f"Fraud Analysis:\n"
113
+ f"- Prediction: {'Potentially Fraudulent' if prediction else 'Likely Legitimate'}\n"
114
+ f"- Confidence: {proba*100:.1f}%\n"
115
+ f"- Risk Factors: {', '.join(explanation) if explanation else 'No specific risk factors identified'}"
116
+ )
117
+
118
+ except Exception as e:
119
+ return f"Error processing query: {str(e)}. Please provide clear details including amount, type, city, age, and income level."
120
+
121
+ # Plotting functions
122
+ def plot_fraud_by_city():
123
+ plt.figure(figsize=(10, 6))
124
+ sns.countplot(data=data[data['Fraud'] == 1], x='City')
125
+ plt.title('Fraud Cases by City')
126
+ plt.xlabel('City')
127
+ plt.ylabel('Number of Fraud Cases')
128
+ return plt
129
+
130
+ def plot_fraud_by_income():
131
+ plt.figure(figsize=(10, 6))
132
+ sns.countplot(data=data[data['Fraud'] == 1], x='Income')
133
+ plt.title('Fraud Cases by Income Level')
134
+ plt.xlabel('Income Level')
135
+ plt.ylabel('Number of Fraud Cases')
136
+ return plt
137
+
138
+ def plot_amount_vs_age():
139
+ plt.figure(figsize=(10, 6))
140
+ sns.scatterplot(data=data, x='Amount', y='Age', hue='Fraud')
141
+ plt.title('Transaction Amount vs Age (Fraud Highlighted)')
142
+ plt.xlabel('Transaction Amount')
143
+ plt.ylabel('Age')
144
+ return plt
145
+
146
+ # Gradio Interface
147
+ with gr.Blocks() as demo:
148
+ gr.Markdown("## Natural Language Fraud Detection System")
149
+
150
+ with gr.Tab("Natural Language Query"):
151
+ gr.Markdown("**Example:** 'I saw a credit transaction of $6000 in New York for a 26-year-old client with low income. Is this suspicious?'")
152
+ nl_input = gr.Textbox(label="Enter your transaction query:")
153
+ nl_output = gr.Textbox(label="Fraud Analysis", lines=10)
154
+ gr.Examples(
155
+ examples=[
156
+ "Is a $8000 credit transaction in Chicago for a 45-year-old with medium income suspicious?",
157
+ "Check a debit of $300 in Phoenix for a 60-year-old high income client",
158
+ "A $12,000 credit transaction occurred in Los Angeles for a 30-year-old with low income. Should I be concerned?",
159
+ "Verify a $5,500 debit in New York by a 22-year-old medium income individual"
160
+ ],
161
+ inputs=nl_input
162
+ )
163
+ nl_input.submit(fn=process_nl_query, inputs=nl_input, outputs=nl_output)
164
+
165
+ with gr.Tab("Data Insights"):
166
+ gr.Markdown("### Fraud Pattern Analysis")
167
+ gr.DataFrame(data[data['Fraud'] == 1].describe())
168
+ with gr.Row():
169
+ gr.Plot(plot_fraud_by_city)
170
+ gr.Plot(plot_fraud_by_income)
171
+ gr.Plot(plot_amount_vs_age)
172
+
173
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ matplotlib
6
+ seaborn
7
+ fuzzywuzzy
8
+ python-Levenshtein