vishalsh13 commited on
Commit
adec62c
·
1 Parent(s): a6b2f62

code update

Browse files
Files changed (1) hide show
  1. app.py +26 -55
app.py CHANGED
@@ -1,8 +1,6 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
- import seaborn as sns
5
- import matplotlib.pyplot as plt
6
  import re
7
  from sklearn.ensemble import RandomForestClassifier
8
  from sklearn.preprocessing import LabelEncoder
@@ -37,11 +35,15 @@ def load_data():
37
 
38
  data = load_data()
39
 
40
- # Preprocessing
41
- le = LabelEncoder()
42
- data['Type_encoded'] = le.fit_transform(data['Type'])
43
- data['City_encoded'] = le.fit_transform(data['City'])
44
- data['Income_encoded'] = le.fit_transform(data['Income'])
 
 
 
 
45
 
46
  # Train model
47
  features = ['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']
@@ -51,7 +53,6 @@ y = data['Fraud']
51
  model = RandomForestClassifier(random_state=42, n_estimators=100)
52
  model.fit(X, y)
53
 
54
- # Enhanced NLP processing with fuzzy matching
55
  def process_nl_query(query):
56
  try:
57
  # Extract amount
@@ -59,7 +60,7 @@ def process_nl_query(query):
59
  if amount_match:
60
  amount = float(amount_match.group(1).replace(',', ''))
61
  else:
62
- return "Error: Could not extract transaction amount. Please specify the amount clearly."
63
 
64
  # Extract transaction type
65
  trans_type = 'Credit' if 'credit' in query.lower() else 'Debit'
@@ -67,26 +68,27 @@ def process_nl_query(query):
67
  # Fuzzy match city
68
  cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
69
  city_match = process.extractOne(query, cities)
70
- city = city_match[0] if city_match[1] > 70 else None
71
 
72
  # Extract age
73
  age_match = re.search(r'(\d+)\s*(?:years?|yrs?)?(?:\s*old)?', query)
74
- if age_match:
75
- age = int(age_match.group(1))
76
- else:
77
- return "Error: Could not extract age. Please specify the age clearly."
78
 
79
  # Extract income level
80
  income = 'Low' if 'low' in query.lower() else \
81
  'High' if 'high' in query.lower() else 'Medium'
 
 
 
 
82
 
83
  # Prepare input
84
  input_df = pd.DataFrame({
85
  'Amount': [amount],
86
- 'Type_encoded': le.transform([trans_type])[0],
87
- 'City_encoded': le.transform([city])[0] if city else -1,
88
- 'Age': [age],
89
- 'Income_encoded': le.transform([income])[0]
90
  })
91
 
92
  # Predict
@@ -106,7 +108,7 @@ def process_nl_query(query):
106
  f"Transaction Details:\n"
107
  f"- Amount: ${amount:,.2f}\n"
108
  f"- Type: {trans_type}\n"
109
- f"- City: {city if city else 'Unknown'}\n"
110
  f"- Age: {age}\n"
111
  f"- Income Level: {income}\n\n"
112
  f"Fraud Analysis:\n"
@@ -116,47 +118,20 @@ def process_nl_query(query):
116
  )
117
 
118
  except Exception as e:
119
- return f"Error processing query: {str(e)}. Please provide clear details including amount, type, city, age, and income level."
120
-
121
- # Plotting functions
122
- def plot_fraud_by_city():
123
- plt.figure(figsize=(10, 6))
124
- sns.countplot(data=data[data['Fraud'] == 1], x='City')
125
- plt.title('Fraud Cases by City')
126
- plt.xlabel('City')
127
- plt.ylabel('Number of Fraud Cases')
128
- return plt
129
-
130
- def plot_fraud_by_income():
131
- plt.figure(figsize=(10, 6))
132
- sns.countplot(data=data[data['Fraud'] == 1], x='Income')
133
- plt.title('Fraud Cases by Income Level')
134
- plt.xlabel('Income Level')
135
- plt.ylabel('Number of Fraud Cases')
136
- return plt
137
-
138
- def plot_amount_vs_age():
139
- plt.figure(figsize=(10, 6))
140
- sns.scatterplot(data=data, x='Amount', y='Age', hue='Fraud')
141
- plt.title('Transaction Amount vs Age (Fraud Highlighted)')
142
- plt.xlabel('Transaction Amount')
143
- plt.ylabel('Age')
144
- return plt
145
 
146
  # Gradio Interface
147
  with gr.Blocks() as demo:
148
- gr.Markdown("## Natural Language Fraud Detection System")
149
 
150
  with gr.Tab("Natural Language Query"):
151
- gr.Markdown("**Example:** 'I saw a credit transaction of $6000 in New York for a 26-year-old client with low income. Is this suspicious?'")
152
  nl_input = gr.Textbox(label="Enter your transaction query:")
153
  nl_output = gr.Textbox(label="Fraud Analysis", lines=10)
154
  gr.Examples(
155
  examples=[
156
- "Is a $8000 credit transaction in Chicago for a 45-year-old with medium income suspicious?",
157
- "Check a debit of $300 in Phoenix for a 60-year-old high income client",
158
- "A $12,000 credit transaction occurred in Los Angeles for a 30-year-old with low income. Should I be concerned?",
159
- "Verify a $5,500 debit in New York by a 22-year-old medium income individual"
160
  ],
161
  inputs=nl_input
162
  )
@@ -165,9 +140,5 @@ with gr.Blocks() as demo:
165
  with gr.Tab("Data Insights"):
166
  gr.Markdown("### Fraud Pattern Analysis")
167
  gr.DataFrame(data[data['Fraud'] == 1].describe())
168
- with gr.Row():
169
- gr.Plot(plot_fraud_by_city)
170
- gr.Plot(plot_fraud_by_income)
171
- gr.Plot(plot_amount_vs_age)
172
 
173
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
 
 
4
  import re
5
  from sklearn.ensemble import RandomForestClassifier
6
  from sklearn.preprocessing import LabelEncoder
 
35
 
36
  data = load_data()
37
 
38
+ # Initialize separate encoders for each feature
39
+ le_type = LabelEncoder()
40
+ le_city = LabelEncoder()
41
+ le_income = LabelEncoder()
42
+
43
+ # Fit encoders on full dataset (or training data in real scenarios)
44
+ data['Type_encoded'] = le_type.fit_transform(data['Type'])
45
+ data['City_encoded'] = le_city.fit_transform(data['City'])
46
+ data['Income_encoded'] = le_income.fit_transform(data['Income'])
47
 
48
  # Train model
49
  features = ['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']
 
53
  model = RandomForestClassifier(random_state=42, n_estimators=100)
54
  model.fit(X, y)
55
 
 
56
  def process_nl_query(query):
57
  try:
58
  # Extract amount
 
60
  if amount_match:
61
  amount = float(amount_match.group(1).replace(',', ''))
62
  else:
63
+ return "Error: Could not extract transaction amount."
64
 
65
  # Extract transaction type
66
  trans_type = 'Credit' if 'credit' in query.lower() else 'Debit'
 
68
  # Fuzzy match city
69
  cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
70
  city_match = process.extractOne(query, cities)
71
+ city = city_match[0] if city_match[1] > 70 else 'Unknown'
72
 
73
  # Extract age
74
  age_match = re.search(r'(\d+)\s*(?:years?|yrs?)?(?:\s*old)?', query)
75
+ age = int(age_match.group(1)) if age_match else None
 
 
 
76
 
77
  # Extract income level
78
  income = 'Low' if 'low' in query.lower() else \
79
  'High' if 'high' in query.lower() else 'Medium'
80
+
81
+ # Handle unseen labels
82
+ city_encoded = le_city.transform([city])[0] if city in le_city.classes_ else -1
83
+ income_encoded = le_income.transform([income])[0] if income in le_income.classes_ else -1
84
 
85
  # Prepare input
86
  input_df = pd.DataFrame({
87
  'Amount': [amount],
88
+ 'Type_encoded': le_type.transform([trans_type])[0],
89
+ 'City_encoded': city_encoded,
90
+ 'Age': [age] if age else data['Age'].median(), # Handle missing age
91
+ 'Income_encoded': income_encoded
92
  })
93
 
94
  # Predict
 
108
  f"Transaction Details:\n"
109
  f"- Amount: ${amount:,.2f}\n"
110
  f"- Type: {trans_type}\n"
111
+ f"- City: {city}\n"
112
  f"- Age: {age}\n"
113
  f"- Income Level: {income}\n\n"
114
  f"Fraud Analysis:\n"
 
118
  )
119
 
120
  except Exception as e:
121
+ return f"Error processing query: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # Gradio Interface
124
  with gr.Blocks() as demo:
125
+ gr.Markdown("## Enhanced Fraud Detection System")
126
 
127
  with gr.Tab("Natural Language Query"):
128
+ gr.Markdown("**Example:** 'Check a $6000 credit in New York for a 26-year-old with low income'")
129
  nl_input = gr.Textbox(label="Enter your transaction query:")
130
  nl_output = gr.Textbox(label="Fraud Analysis", lines=10)
131
  gr.Examples(
132
  examples=[
133
+ "Is a $8000 credit in Chicago for a 45-year-old medium income safe?",
134
+ "Verify a $300 debit in Phoenix for a 60-year-old high income client"
 
 
135
  ],
136
  inputs=nl_input
137
  )
 
140
  with gr.Tab("Data Insights"):
141
  gr.Markdown("### Fraud Pattern Analysis")
142
  gr.DataFrame(data[data['Fraud'] == 1].describe())
 
 
 
 
143
 
144
  demo.launch()