Spaces:

niladridutta
/

detect_inactive_records

Sleeping

App Files Files Community

niladridutta commited on Jan 25, 2024

Commit

b929aa7

verified ·

1 Parent(s): 6442bec

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import streamlit as st
+import pandas as pd
+import openpyxl
+import lifetimes
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+sns.set()
+import warnings
+warnings.filterwarnings('ignore')
+st.set_page_config(page_title='Detect Inactive Records')
+st.title('Detect Inactive Records')
+st.subheader('Upload your Excel file')
+uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
+if uploaded_file is not None:
+    st.markdown('---')
+    # Loading the data
+    @st.cache_data
+    def load_excel(file1):
+            df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
+            return df
+    data = load_excel(uploaded_file)
+    st.subheader('Data Preview')
+    st.dataframe(data.head(20))
+    # Feature selection
+    features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
+    data_clv = data[features]
+    data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
+    #Check for missing values
+    mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
+    data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
+    #Remove -ve values
+    data_clv = data_clv[data_clv['TotalSales'] > 0]
+    # Creating the summary data using summary_data_from_transaction_data function
+    summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
+    summary = summary.reset_index()
+    summary['frequency'].plot(kind='hist', bins=50)
+    one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
+    # Fitting the BG/NBD model
+    bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
+    bgf.fit(summary['frequency'], summary['recency'], summary['T'])
+    bgf_coefficient=bgf.summary
+    # Compute the customer alive probability
+    summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
+    #Predict future transaction for the next 300 days based on historical dataa
+    t = 300
+    summary['pred_num_txn'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
+    summary.sort_values(by='pred_num_txn', ascending=False).head(10).reset_index()
+    #Hidden trends
+    ax = sns.countplot(x="pred_num_txn",data=summary)
+    plt.scatter(summary['probability_alive'],summary['pred_num_txn'])
+    summary_correlation=summary.corr()
+    summary1=summary
+    summary1['Active/Inactive']=summary1['pred_num_txn'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
+    selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
+    summary2=summary1[['CustomerID','Active/Inactive']]
+    if selector is not None:
+        selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
+        st.write('STATUS:',selected)
+    trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
+    trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
+    trends.at[0,'Quantity']=7.42
+    trends1.at[0,'TotalSales']=77.183
+    summary1=summary1.merge(trends, how='left',on='CustomerID')
+    summary1=summary1.merge(trends1, how='left',on='CustomerID')
+    out=summary1.to_csv().encode('utf-8')
+    st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv')