Spaces:

loxzdigital
/

Model-CC-Space

Runtime error

App Files Files Community

Andy Lau commited on Nov 26, 2022

Commit

41e5416

1 Parent(s): 8170b82

fix typo

Browse files

Files changed (1) hide show

app.py +94 -75

app.py CHANGED Viewed

@@ -39,8 +39,8 @@ def table_data():
     ]
     data = {
-        'Field':field,
-        'Data':data
     }
     df = pd.DataFrame.from_dict(data)
@@ -48,26 +48,29 @@ def table_data():
     return df
-def url_button(button_name,url):
     if st.button(button_name):
-        js = """window.open('{url}')""".format(url=url) # New tab or window
         html = '<img src onerror="{}">'.format(js)
         div = Div(text=html)
         st.bokeh_chart(div)
 def get_industry_code_dict(training_dataset):
-    training_dataset['industry_code'] = training_dataset['industry'].astype('category')
     cat_columns = training_dataset.select_dtypes(['category']).columns
-    training_dataset[cat_columns] = training_dataset[cat_columns].apply(lambda x: x.cat.codes)
-    industry_code_dict = dict(zip(training_dataset.industry, training_dataset.industry_code))
     return industry_code_dict
-## extract email body from parse email
-def email_body_extractor(email_data):
     # email_data = parsed_email.data[0]
-    emailstr = email_data.decode("utf-8")
     b = email.message_from_string(emailstr)
     body = ""
@@ -83,20 +86,20 @@ def email_body_extractor(email_data):
     # not multipart - i.e. plain text, no attachments, keeping fingers crossed
     else:
         body = b.get_payload()
-    ## Remove escape sequences
     body = body.replace('\n', '')
     body = body.replace('\t', '')
     body = body.replace('\r', '')
     body = body.replace('</b>', '')
     body = body.replace('<b>', '')
-    ## Extract urls in the email body and get url counts
     extractor = URLExtract()
     urls = extractor.find_urls(body)
     url_cnt = len(urls)
-    ## Remove urls
-    body = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', body)
     sep = '©'
     body = body.split(sep, 1)[0]
     character_cnt = sum(not chr.isspace() for chr in body)
@@ -106,7 +109,7 @@ def email_body_extractor(email_data):
 def add_bg_from_url():
     st.markdown(
-         f"""
          <style>
          .stApp {{
              background-image: linear-gradient(#0A3144,#126072,#1C8D99);
@@ -115,10 +118,11 @@ def add_bg_from_url():
          }}
          </style>
          """,
-         unsafe_allow_html=True
-     )
-# add_bg_from_url()
 st.markdown("# Character Count: Email Industry")
@@ -128,7 +132,7 @@ st.markdown("# Character Count: Email Industry")
 #     img = PIL.Image.open("figures/ModelCC_solid.png")
 #     st.image(img)
-stats_col1, stats_col2, stats_col3, stats_col4 = st.columns([1,1,1,1])
 with stats_col1:
     st.metric(label="Production", value="Production")
@@ -142,7 +146,6 @@ with stats_col4:
     st.metric(label="Industry", value="Email")
 with st.sidebar:
     with st.expander('Model Description', expanded=False):
@@ -160,9 +163,9 @@ with st.sidebar:
         st.markdown(hide_table_row_index, unsafe_allow_html=True)
         st.table(table_data())
-    url_button('Model Homepage','https://www.loxz.com/#/models/CTA')
     # url_button('Full Report','https://resources.loxz.com/reports/realtime-ml-character-count-model')
-    url_button('Amazon Market Place','https://aws.amazon.com/marketplace')
 industry_lists = [
@@ -178,24 +181,25 @@ industry_lists = [
 ]
 campaign_types = [
-    'Promotional',
-    'Transactional',
-    'Webinar',
-    'Survey',
-    'Newsletter',
     'Engagement',
-    'Curated_Content',
-    'Review_Request',
-    'Product_Announcement',
     'Abandoned_Cart'
 ]
 target_variables = [
-    'conversion_rate',
     'click_to_open_rate'
 ]
-uploaded_file = st.file_uploader("Please upload your email (In HTML Format)", type=["html"])
 if uploaded_file is None:
     # upload_img = PIL.Image.open(uploaded_file)
@@ -210,8 +214,8 @@ industry = st.selectbox(
     index=6
 )
-campaign  = st.selectbox(
-    'Please select your industry',
     campaign_types,
     index=5
 )
@@ -230,24 +234,24 @@ char_reco_preference = st.selectbox(
     index=1)
-def get_files_from_aws(bucket,prefix):
     """
         get files from aws s3 bucket
     bucket (STRING): bucket name
     prefix (STRING): file location in s3 bucket
     """
     s3_client = boto3.client('s3',
-        aws_access_key_id = st.secrets["aws_id"],
-        aws_secret_access_key = st.secrets["aws_key"])
-    file_obj = s3_client.get_object(Bucket=bucket,Key=prefix)
     body = file_obj['Body']
     string = body.read().decode('utf-8')
     df = pd.read_csv(StringIO(string))
-    return df
 # st.info([industry,campaign,target,char_reco_preference])
@@ -264,20 +268,26 @@ if st.button('Generate Predictions'):
         # Starting predictions
         model = joblib.load('models/models.sav')
         # Generate Email Data
-        email_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training.csv')
-        acc_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training_raw.csv')
-        email_data_ = email_data[["email_body", "industry", "campaign_type","character_cnt", "url_cnt","Open_Rate", "Click_Through_Rate"]]
-        email_data_ = email_data_.rename({'Open_Rate': 'Click-to-open_Rate', 'Click_Through_Rate': 'Conversion_Rate'})
-        df_email_data = email_data_.rename(columns={'Open_Rate': 'Click-to-open_Rate', 'Click_Through_Rate': 'Conversion_Rate'})
         # Dataset:
-        training_dataset = get_files_from_aws('emailcampaigntrainingdata','modelCC/training.csv')
         # X_test = get_files_from_aws('emailcampaigntrainingdata','modelCC/Xtest.csv')
         # Y_test = get_files_from_aws('emailcampaigntrainingdata','modelCC/ytest.csv')
         # print("Getting Data Time: %s seconds" % (time.time() - start_time))
         industry_code_dict = get_industry_code_dict(email_data)
         bytes_data = uploaded_file.getvalue()
@@ -287,24 +297,28 @@ if st.button('Generate Predictions'):
         # Need to solve X test issue
         # y_pred = model.predict(X_test)
-        df_uploaded = pd.DataFrame(columns=['character_cnt', "url_cnt", "industry"])
         df_uploaded.loc[0] = [character_cnt, url_cnt, industry]
         df_uploaded["industry_code"] = industry_code_dict.get(industry)
-        df_uploaded_test = df_uploaded[["industry_code", "character_cnt", "url_cnt"]]
-        predicted_rate =  model.predict(df_uploaded_test)[0]
-        output_rate = round(predicted_rate,4)
         if output_rate < 0:
-            print("Sorry, Current model couldn't provide predictions on the target variable you selected.")
         else:
-            st.markdown('#### Current Character Count in Your Email is: <span style="color:blue">{}</span>'.format(character_cnt), unsafe_allow_html=True)
             # st.info('The model predicts that it achieves a {} of {}%'.format(target, str(round(output_rate*100,2))))
             if target == 'conversion_rate':
                 target_vis = 'Conversion Rate'
             else:
                 target_vis = 'Click-to-Open Rate'
-            st.markdown('#### The model predicts that it achieves a <span style="color:blue">{}</span> of <span style="color:blue">{}</span>%'.format(target_vis, str(round(output_rate*100,2))),unsafe_allow_html=True)
             selected_industry_code = industry_code_dict.get(industry)
             if target == "click_to_open_rate":
@@ -312,38 +326,43 @@ if st.button('Generate Predictions'):
             if target == "conversion_rate":
                 selected_variable = "Click_Through_Rate"
-            df_reco = training_dataset[["industry_code", "character_cnt", "url_cnt", selected_variable]]
-            df_reco = df_reco[df_reco["industry_code"] == selected_industry_code]
-            df_reco[selected_variable]=df_reco[selected_variable].apply(lambda x:round(x, 3))
             df_reco_sort = df_reco.sort_values(by=[selected_variable])
             df_reco = df_reco.drop_duplicates(subset=selected_variable)
             preference = char_reco_preference
             if preference == "Increase":
-                df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
-                df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
-            ## decrease character reco
             if preference == "Decrease":
-                df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] < character_cnt)]
-                df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
             if selected_variable == "Open_Rate":
                 selected_variable = "Click-to-Open_Rate"
             if selected_variable == "Click_Through_Rate":
                 selected_variable = "Conversion_Rate"
-            st.markdown('#### To get higher, <span style="color:blue">{}</span>, the model recommends the following options:'.format(selected_variable),unsafe_allow_html=True)
             if len(df_reco_opt_rank) == 0:
-                st.markdown('#### You ve already achieved the highest, <span style="color:blue">{}</span>, with the current character count!'.format(selected_variable),unsafe_allow_html=True)
             else:
                 for _, row in df_reco_opt_rank.iterrows():
                     Character_Count = row[1]
                     selected_variable = row[3]
                     # print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
-                    st.markdown('Number of Characters: {}, Target Rate: {}'.format(int(Character_Count), round(selected_variable*100, 3)))
         placeholder.empty()
-        # print(time.time() - start_time)

     ]
     data = {
+        'Field': field,
+        'Data': data
     }
     df = pd.DataFrame.from_dict(data)
     return df
+def url_button(button_name, url):
     if st.button(button_name):
+        js = """window.open('{url}')""".format(url=url)  # New tab or window
         html = '<img src onerror="{}">'.format(js)
         div = Div(text=html)
         st.bokeh_chart(div)
 def get_industry_code_dict(training_dataset):
+    training_dataset['industry_code'] = training_dataset['industry'].astype(
+        'category')
     cat_columns = training_dataset.select_dtypes(['category']).columns
+    training_dataset[cat_columns] = training_dataset[cat_columns].apply(
+        lambda x: x.cat.codes)
+    industry_code_dict = dict(
+        zip(training_dataset.industry, training_dataset.industry_code))
     return industry_code_dict
+# extract email body from parse email
+def email_body_extractor(email_data):
     # email_data = parsed_email.data[0]
+    emailstr = email_data.decode("utf-8")
     b = email.message_from_string(emailstr)
     body = ""
     # not multipart - i.e. plain text, no attachments, keeping fingers crossed
     else:
         body = b.get_payload()
+    # Remove escape sequences
     body = body.replace('\n', '')
     body = body.replace('\t', '')
     body = body.replace('\r', '')
     body = body.replace('</b>', '')
     body = body.replace('<b>', '')
+    # Extract urls in the email body and get url counts
     extractor = URLExtract()
     urls = extractor.find_urls(body)
     url_cnt = len(urls)
+    # Remove urls
+    body = re.sub(
+        r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', body)
     sep = '©'
     body = body.split(sep, 1)[0]
     character_cnt = sum(not chr.isspace() for chr in body)
 def add_bg_from_url():
     st.markdown(
+        f"""
          <style>
          .stApp {{
              background-image: linear-gradient(#0A3144,#126072,#1C8D99);
          }}
          </style>
          """,
+        unsafe_allow_html=True
+    )
+# add_bg_from_url()
 st.markdown("# Character Count: Email Industry")
 #     img = PIL.Image.open("figures/ModelCC_solid.png")
 #     st.image(img)
+stats_col1, stats_col2, stats_col3, stats_col4 = st.columns([1, 1, 1, 1])
 with stats_col1:
     st.metric(label="Production", value="Production")
     st.metric(label="Industry", value="Email")
 with st.sidebar:
     with st.expander('Model Description', expanded=False):
         st.markdown(hide_table_row_index, unsafe_allow_html=True)
         st.table(table_data())
+    url_button('Model Homepage', 'https://www.loxz.com/#/models/CTA')
     # url_button('Full Report','https://resources.loxz.com/reports/realtime-ml-character-count-model')
+    url_button('Amazon Market Place', 'https://aws.amazon.com/marketplace')
 industry_lists = [
 ]
 campaign_types = [
+    'Promotional',
+    'Transactional',
+    'Webinar',
+    'Survey',
+    'Newsletter',
     'Engagement',
+    'Curated_Content',
+    'Review_Request',
+    'Product_Announcement',
     'Abandoned_Cart'
 ]
 target_variables = [
+    'conversion_rate',
     'click_to_open_rate'
 ]
+uploaded_file = st.file_uploader(
+    "Please upload your email (In HTML Format)", type=["html"])
 if uploaded_file is None:
     # upload_img = PIL.Image.open(uploaded_file)
     index=6
 )
+campaign = st.selectbox(
+    'Please select your campaign type',
     campaign_types,
     index=5
 )
     index=1)
+def get_files_from_aws(bucket, prefix):
     """
         get files from aws s3 bucket
     bucket (STRING): bucket name
     prefix (STRING): file location in s3 bucket
     """
     s3_client = boto3.client('s3',
+                             aws_access_key_id=st.secrets["aws_id"],
+                             aws_secret_access_key=st.secrets["aws_key"])
+    file_obj = s3_client.get_object(Bucket=bucket, Key=prefix)
     body = file_obj['Body']
     string = body.read().decode('utf-8')
     df = pd.read_csv(StringIO(string))
+    return df
 # st.info([industry,campaign,target,char_reco_preference])
         # Starting predictions
         model = joblib.load('models/models.sav')
         # Generate Email Data
+        email_data = get_files_from_aws(
+            'emailcampaigntrainingdata', 'trainingdata/email_dataset_training.csv')
+        acc_data = get_files_from_aws(
+            'emailcampaigntrainingdata', 'trainingdata/email_dataset_training_raw.csv')
+        email_data_ = email_data[["email_body", "industry", "campaign_type",
+                                  "character_cnt", "url_cnt", "Open_Rate", "Click_Through_Rate"]]
+        email_data_ = email_data_.rename(
+            {'Open_Rate': 'Click-to-open_Rate', 'Click_Through_Rate': 'Conversion_Rate'})
+        df_email_data = email_data_.rename(
+            columns={'Open_Rate': 'Click-to-open_Rate', 'Click_Through_Rate': 'Conversion_Rate'})
         # Dataset:
+        training_dataset = get_files_from_aws(
+            'emailcampaigntrainingdata', 'modelCC/training.csv')
         # X_test = get_files_from_aws('emailcampaigntrainingdata','modelCC/Xtest.csv')
         # Y_test = get_files_from_aws('emailcampaigntrainingdata','modelCC/ytest.csv')
         # print("Getting Data Time: %s seconds" % (time.time() - start_time))
         industry_code_dict = get_industry_code_dict(email_data)
         bytes_data = uploaded_file.getvalue()
         # Need to solve X test issue
         # y_pred = model.predict(X_test)
+        df_uploaded = pd.DataFrame(
+            columns=['character_cnt', "url_cnt", "industry"])
         df_uploaded.loc[0] = [character_cnt, url_cnt, industry]
         df_uploaded["industry_code"] = industry_code_dict.get(industry)
+        df_uploaded_test = df_uploaded[[
+            "industry_code", "character_cnt", "url_cnt"]]
+        predicted_rate = model.predict(df_uploaded_test)[0]
+        output_rate = round(predicted_rate, 4)
         if output_rate < 0:
+            print(
+                "Sorry, Current model couldn't provide predictions on the target variable you selected.")
         else:
+            st.markdown('#### Current Character Count in Your Email is: <span style="color:blue">{}</span>'.format(
+                character_cnt), unsafe_allow_html=True)
             # st.info('The model predicts that it achieves a {} of {}%'.format(target, str(round(output_rate*100,2))))
             if target == 'conversion_rate':
                 target_vis = 'Conversion Rate'
             else:
                 target_vis = 'Click-to-Open Rate'
+            st.markdown('#### The model predicts that it achieves a <span style="color:blue">{}</span> of <span style="color:blue">{}</span>%'.format(
+                target_vis, str(round(output_rate*100, 2))), unsafe_allow_html=True)
             selected_industry_code = industry_code_dict.get(industry)
             if target == "click_to_open_rate":
             if target == "conversion_rate":
                 selected_variable = "Click_Through_Rate"
+            df_reco = training_dataset[[
+                "industry_code", "character_cnt", "url_cnt", selected_variable]]
+            df_reco = df_reco[df_reco["industry_code"]
+                              == selected_industry_code]
+            df_reco[selected_variable] = df_reco[selected_variable].apply(
+                lambda x: round(x, 3))
             df_reco_sort = df_reco.sort_values(by=[selected_variable])
             df_reco = df_reco.drop_duplicates(subset=selected_variable)
             preference = char_reco_preference
             if preference == "Increase":
+                df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (
+                    df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
+                df_reco_opt_rank = df_reco_opt.nlargest(3, [selected_variable])
+            # decrease character reco
             if preference == "Decrease":
+                df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (
+                    df_reco["character_cnt"] < character_cnt)]
+                df_reco_opt_rank = df_reco_opt.nlargest(3, [selected_variable])
             if selected_variable == "Open_Rate":
                 selected_variable = "Click-to-Open_Rate"
             if selected_variable == "Click_Through_Rate":
                 selected_variable = "Conversion_Rate"
+            st.markdown('#### To get higher, <span style="color:blue">{}</span>, the model recommends the following options:'.format(
+                selected_variable), unsafe_allow_html=True)
             if len(df_reco_opt_rank) == 0:
+                st.markdown('#### You ve already achieved the highest, <span style="color:blue">{}</span>, with the current character count!'.format(
+                    selected_variable), unsafe_allow_html=True)
             else:
                 for _, row in df_reco_opt_rank.iterrows():
                     Character_Count = row[1]
                     selected_variable = row[3]
                     # print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
+                    st.markdown('Number of Characters: {}, Target Rate: {}'.format(
+                        int(Character_Count), round(selected_variable*100, 3)))
         placeholder.empty()
+        # print(time.time() - start_time)