first
Browse files- app.py +108 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import joblib
|
3 |
+
from math import radians
|
4 |
+
from sml import cc_features
|
5 |
+
|
6 |
+
import pandas as pd
|
7 |
+
import numpy as np
|
8 |
+
import plotly.express as px
|
9 |
+
from matplotlib import pyplot
|
10 |
+
import warnings
|
11 |
+
|
12 |
+
import hopsworks
|
13 |
+
from sml import synthetic_data
|
14 |
+
|
15 |
+
import streamlit as st
|
16 |
+
|
17 |
+
import folium
|
18 |
+
from streamlit_folium import st_folium
|
19 |
+
import json
|
20 |
+
|
21 |
+
start_date = (datetime.datetime.now() - datetime.timedelta(hours=200))
|
22 |
+
end_date = (datetime.datetime.now())
|
23 |
+
|
24 |
+
synthetic_data.set_random_seed(12345)
|
25 |
+
credit_cards = [cc["cc_num"] for cc in synthetic_data.generate_list_credit_card_numbers()]
|
26 |
+
lat = 0
|
27 |
+
long = 0
|
28 |
+
|
29 |
+
warnings.filterwarnings("ignore")
|
30 |
+
|
31 |
+
project = hopsworks.login()
|
32 |
+
fs = project.get_feature_store()
|
33 |
+
|
34 |
+
@st.cache(allow_output_mutation=True, suppress_st_warning=True)
|
35 |
+
def retrieve_dataset(fv, start_date, end_date):
|
36 |
+
st.write(36 * "-")
|
37 |
+
print_fancy_header('\nπΎ Dataset Retrieving...')
|
38 |
+
batch_data = fv.get_batch_data(start_time = start_date, end_time = end_date)
|
39 |
+
batch_data.drop(["tid", "cc_num", "datetime"], axis = 1, inplace=True)
|
40 |
+
return batch_data
|
41 |
+
|
42 |
+
|
43 |
+
@st.cache(suppress_st_warning=True, allow_output_mutation=True)
|
44 |
+
def get_feature_view():
|
45 |
+
fv = fs.get_feature_view("cc_trans_fraud", 1)
|
46 |
+
return fv
|
47 |
+
|
48 |
+
|
49 |
+
@st.cache(allow_output_mutation=True,suppress_st_warning=True)
|
50 |
+
def get_model(project = project):
|
51 |
+
mr = project.get_model_registry()
|
52 |
+
model = mr.get_model("cc_fraud", version = 1)
|
53 |
+
model_dir = model.download()
|
54 |
+
return joblib.load(model_dir + "/cc_fraud_model.pkl")
|
55 |
+
|
56 |
+
def explore_data(batch_data):
|
57 |
+
st.write(36 * "-")
|
58 |
+
print_fancy_header('\nπ Data Exploration...')
|
59 |
+
labels = ["Suspected of Fraud", "Not Suspected of Fraud"]
|
60 |
+
unique, counts = np.unique(batch_data.fraud.values, return_counts=True)
|
61 |
+
values = counts.tolist()
|
62 |
+
|
63 |
+
def plot_pie(values, labels):
|
64 |
+
fig = px.pie(values=values, names=labels, title='Distribution of predicted fraud transactions')
|
65 |
+
return fig
|
66 |
+
|
67 |
+
fig1 = plot_pie(values, labels)
|
68 |
+
st.plotly_chart(fig1)
|
69 |
+
|
70 |
+
|
71 |
+
def print_fancy_header(text, font_size=24):
|
72 |
+
res = f'<span style="color:#ff5f27; font-size: {font_size}px;">{text}</span>'
|
73 |
+
st.markdown(res, unsafe_allow_html=True)
|
74 |
+
|
75 |
+
def transform_preds(predictions):
|
76 |
+
return ['Fraud' if pred == 1 else 'Not Fraud' for pred in predictions]
|
77 |
+
|
78 |
+
progress_bar = st.sidebar.header('βοΈ Working Progress')
|
79 |
+
progress_bar = st.sidebar.progress(0)
|
80 |
+
st.title('π Fraud transactions detection π')
|
81 |
+
|
82 |
+
st.write(36 * "-")
|
83 |
+
print_fancy_header('\nπ‘ Connecting to Hopsworks Feature Store...')
|
84 |
+
|
85 |
+
st.write(36 * "-")
|
86 |
+
print_fancy_header('\nπ€ Connecting to Model Registry on Hopsworks...')
|
87 |
+
model = get_model(project)
|
88 |
+
st.write(model)
|
89 |
+
st.write("β
Connected!")
|
90 |
+
|
91 |
+
progress_bar.progress(40)
|
92 |
+
|
93 |
+
st.write(36 * "-")
|
94 |
+
print_fancy_header('\n⨠Fetch batch data and predict')
|
95 |
+
fv = get_feature_view()
|
96 |
+
|
97 |
+
|
98 |
+
if st.button('π Make a prediction'):
|
99 |
+
batch_data = retrieve_dataset(fv, start_date, end_date)
|
100 |
+
st.write("β
Retrieved!")
|
101 |
+
progress_bar.progress(55)
|
102 |
+
predictions = model.predict(batch_data)
|
103 |
+
predictions = transform_preds(predictions)
|
104 |
+
batch_data_to_explore = batch_data.copy()
|
105 |
+
batch_data_to_explore['fraud'] = predictions
|
106 |
+
explore_data(batch_data_to_explore)
|
107 |
+
|
108 |
+
st.button("Re-run")
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
hopsworks
|
2 |
+
joblib
|
3 |
+
scikit-learn
|
4 |
+
streamlit
|