Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import openpyxl
|
4 |
+
import lifetimes
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import seaborn as sns
|
9 |
+
sns.set()
|
10 |
+
|
11 |
+
import warnings
|
12 |
+
warnings.filterwarnings('ignore')
|
13 |
+
|
14 |
+
|
15 |
+
st.set_page_config(page_title='Detect Inactive Records')
|
16 |
+
st.title('Detect Inactive Records')
|
17 |
+
st.subheader('Upload your Excel file')
|
18 |
+
uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
|
19 |
+
if uploaded_file is not None:
|
20 |
+
st.markdown('---')
|
21 |
+
# Loading the data
|
22 |
+
@st.cache_data
|
23 |
+
def load_excel(file1):
|
24 |
+
df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
|
25 |
+
return df
|
26 |
+
|
27 |
+
data = load_excel(uploaded_file)
|
28 |
+
st.subheader('Data Preview')
|
29 |
+
st.dataframe(data.head(20))
|
30 |
+
# Feature selection
|
31 |
+
features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
|
32 |
+
data_clv = data[features]
|
33 |
+
data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
|
34 |
+
#Check for missing values
|
35 |
+
mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
|
36 |
+
data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
|
37 |
+
#Remove -ve values
|
38 |
+
data_clv = data_clv[data_clv['TotalSales'] > 0]
|
39 |
+
|
40 |
+
# Creating the summary data using summary_data_from_transaction_data function
|
41 |
+
summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
|
42 |
+
summary = summary.reset_index()
|
43 |
+
summary['frequency'].plot(kind='hist', bins=50)
|
44 |
+
one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
|
45 |
+
# Fitting the BG/NBD model
|
46 |
+
bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
|
47 |
+
bgf.fit(summary['frequency'], summary['recency'], summary['T'])
|
48 |
+
bgf_coefficient=bgf.summary
|
49 |
+
# Compute the customer alive probability
|
50 |
+
summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
|
51 |
+
#Predict future transaction for the next 300 days based on historical dataa
|
52 |
+
t = 300
|
53 |
+
summary['pred_num_txn'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
|
54 |
+
summary.sort_values(by='pred_num_txn', ascending=False).head(10).reset_index()
|
55 |
+
#Hidden trends
|
56 |
+
ax = sns.countplot(x="pred_num_txn",data=summary)
|
57 |
+
plt.scatter(summary['probability_alive'],summary['pred_num_txn'])
|
58 |
+
summary_correlation=summary.corr()
|
59 |
+
summary1=summary
|
60 |
+
summary1['Active/Inactive']=summary1['pred_num_txn'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
|
61 |
+
selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
|
62 |
+
summary2=summary1[['CustomerID','Active/Inactive']]
|
63 |
+
if selector is not None:
|
64 |
+
selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
|
65 |
+
st.write('STATUS:',selected)
|
66 |
+
trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
|
67 |
+
trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
|
68 |
+
trends.at[0,'Quantity']=7.42
|
69 |
+
trends1.at[0,'TotalSales']=77.183
|
70 |
+
summary1=summary1.merge(trends, how='left',on='CustomerID')
|
71 |
+
summary1=summary1.merge(trends1, how='left',on='CustomerID')
|
72 |
+
out=summary1.to_csv().encode('utf-8')
|
73 |
+
st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')
|