niladridutta's picture
Create app.py
b929aa7 verified
raw
history blame
3.47 kB
import streamlit as st
import pandas as pd
import openpyxl
import lifetimes
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import warnings
warnings.filterwarnings('ignore')
st.set_page_config(page_title='Detect Inactive Records')
st.title('Detect Inactive Records')
st.subheader('Upload your Excel file')
uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
if uploaded_file is not None:
st.markdown('---')
# Loading the data
@st.cache_data
def load_excel(file1):
df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
return df
data = load_excel(uploaded_file)
st.subheader('Data Preview')
st.dataframe(data.head(20))
# Feature selection
features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
data_clv = data[features]
data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
#Check for missing values
mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
#Remove -ve values
data_clv = data_clv[data_clv['TotalSales'] > 0]
# Creating the summary data using summary_data_from_transaction_data function
summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
summary = summary.reset_index()
summary['frequency'].plot(kind='hist', bins=50)
one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
# Fitting the BG/NBD model
bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(summary['frequency'], summary['recency'], summary['T'])
bgf_coefficient=bgf.summary
# Compute the customer alive probability
summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
#Predict future transaction for the next 300 days based on historical dataa
t = 300
summary['pred_num_txn'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
summary.sort_values(by='pred_num_txn', ascending=False).head(10).reset_index()
#Hidden trends
ax = sns.countplot(x="pred_num_txn",data=summary)
plt.scatter(summary['probability_alive'],summary['pred_num_txn'])
summary_correlation=summary.corr()
summary1=summary
summary1['Active/Inactive']=summary1['pred_num_txn'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
summary2=summary1[['CustomerID','Active/Inactive']]
if selector is not None:
selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
st.write('STATUS:',selected)
trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
trends.at[0,'Quantity']=7.42
trends1.at[0,'TotalSales']=77.183
summary1=summary1.merge(trends, how='left',on='CustomerID')
summary1=summary1.merge(trends1, how='left',on='CustomerID')
out=summary1.to_csv().encode('utf-8')
st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')