Spaces:

niladridutta
/

detect_inactive_records

Sleeping

App Files Files Community

detect_inactive_records / app.py

niladridutta

Create app.py

b929aa7 verified over 1 year ago

raw

history blame

3.47 kB

	import streamlit as st
	import pandas as pd
	import openpyxl
	import lifetimes
	import numpy as np

	import matplotlib.pyplot as plt
	import seaborn as sns
	sns.set()

	import warnings
	warnings.filterwarnings('ignore')


	st.set_page_config(page_title='Detect Inactive Records')
	st.title('Detect Inactive Records')
	st.subheader('Upload your Excel file')
	uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
	if uploaded_file is not None:
	st.markdown('---')
	# Loading the data
	@st.cache_data
	def load_excel(file1):
	df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
	return df

	data = load_excel(uploaded_file)
	st.subheader('Data Preview')
	st.dataframe(data.head(20))
	# Feature selection
	features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
	data_clv = data[features]
	data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
	#Check for missing values
	mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
	data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
	#Remove -ve values
	data_clv = data_clv[data_clv['TotalSales'] > 0]

	# Creating the summary data using summary_data_from_transaction_data function
	summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
	summary = summary.reset_index()
	summary['frequency'].plot(kind='hist', bins=50)
	one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
	# Fitting the BG/NBD model
	bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
	bgf.fit(summary['frequency'], summary['recency'], summary['T'])
	bgf_coefficient=bgf.summary
	# Compute the customer alive probability
	summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
	#Predict future transaction for the next 300 days based on historical dataa
	t = 300
	summary['pred_num_txn'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
	summary.sort_values(by='pred_num_txn', ascending=False).head(10).reset_index()
	#Hidden trends
	ax = sns.countplot(x="pred_num_txn",data=summary)
	plt.scatter(summary['probability_alive'],summary['pred_num_txn'])
	summary_correlation=summary.corr()
	summary1=summary
	summary1['Active/Inactive']=summary1['pred_num_txn'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
	selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
	summary2=summary1[['CustomerID','Active/Inactive']]
	if selector is not None:
	selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
	st.write('STATUS:',selected)
	trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
	trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
	trends.at[0,'Quantity']=7.42
	trends1.at[0,'TotalSales']=77.183
	summary1=summary1.merge(trends, how='left',on='CustomerID')
	summary1=summary1.merge(trends1, how='left',on='CustomerID')
	out=summary1.to_csv().encode('utf-8')
	st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')