niladridutta commited on
Commit
b929aa7
·
verified ·
1 Parent(s): 6442bec

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import openpyxl
4
+ import lifetimes
5
+ import numpy as np
6
+
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+ sns.set()
10
+
11
+ import warnings
12
+ warnings.filterwarnings('ignore')
13
+
14
+
15
+ st.set_page_config(page_title='Detect Inactive Records')
16
+ st.title('Detect Inactive Records')
17
+ st.subheader('Upload your Excel file')
18
+ uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
19
+ if uploaded_file is not None:
20
+ st.markdown('---')
21
+ # Loading the data
22
+ @st.cache_data
23
+ def load_excel(file1):
24
+ df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
25
+ return df
26
+
27
+ data = load_excel(uploaded_file)
28
+ st.subheader('Data Preview')
29
+ st.dataframe(data.head(20))
30
+ # Feature selection
31
+ features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
32
+ data_clv = data[features]
33
+ data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
34
+ #Check for missing values
35
+ mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
36
+ data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
37
+ #Remove -ve values
38
+ data_clv = data_clv[data_clv['TotalSales'] > 0]
39
+
40
+ # Creating the summary data using summary_data_from_transaction_data function
41
+ summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
42
+ summary = summary.reset_index()
43
+ summary['frequency'].plot(kind='hist', bins=50)
44
+ one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
45
+ # Fitting the BG/NBD model
46
+ bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
47
+ bgf.fit(summary['frequency'], summary['recency'], summary['T'])
48
+ bgf_coefficient=bgf.summary
49
+ # Compute the customer alive probability
50
+ summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
51
+ #Predict future transaction for the next 300 days based on historical dataa
52
+ t = 300
53
+ summary['pred_num_txn'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
54
+ summary.sort_values(by='pred_num_txn', ascending=False).head(10).reset_index()
55
+ #Hidden trends
56
+ ax = sns.countplot(x="pred_num_txn",data=summary)
57
+ plt.scatter(summary['probability_alive'],summary['pred_num_txn'])
58
+ summary_correlation=summary.corr()
59
+ summary1=summary
60
+ summary1['Active/Inactive']=summary1['pred_num_txn'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
61
+ selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
62
+ summary2=summary1[['CustomerID','Active/Inactive']]
63
+ if selector is not None:
64
+ selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
65
+ st.write('STATUS:',selected)
66
+ trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
67
+ trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
68
+ trends.at[0,'Quantity']=7.42
69
+ trends1.at[0,'TotalSales']=77.183
70
+ summary1=summary1.merge(trends, how='left',on='CustomerID')
71
+ summary1=summary1.merge(trends1, how='left',on='CustomerID')
72
+ out=summary1.to_csv().encode('utf-8')
73
+ st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')