Spaces:
Runtime error
Runtime error
Andy Lau
commited on
Commit
·
41e5416
1
Parent(s):
8170b82
fix typo
Browse files
app.py
CHANGED
@@ -39,8 +39,8 @@ def table_data():
|
|
39 |
]
|
40 |
|
41 |
data = {
|
42 |
-
'Field':field,
|
43 |
-
'Data':data
|
44 |
}
|
45 |
|
46 |
df = pd.DataFrame.from_dict(data)
|
@@ -48,26 +48,29 @@ def table_data():
|
|
48 |
return df
|
49 |
|
50 |
|
51 |
-
|
52 |
-
def url_button(button_name,url):
|
53 |
if st.button(button_name):
|
54 |
-
js = """window.open('{url}')""".format(url=url)
|
55 |
html = '<img src onerror="{}">'.format(js)
|
56 |
div = Div(text=html)
|
57 |
st.bokeh_chart(div)
|
58 |
|
|
|
59 |
def get_industry_code_dict(training_dataset):
|
60 |
-
training_dataset['industry_code'] = training_dataset['industry'].astype(
|
|
|
61 |
cat_columns = training_dataset.select_dtypes(['category']).columns
|
62 |
-
training_dataset[cat_columns] = training_dataset[cat_columns].apply(
|
63 |
-
|
|
|
|
|
64 |
return industry_code_dict
|
65 |
|
66 |
|
67 |
-
|
68 |
-
def email_body_extractor(email_data):
|
69 |
# email_data = parsed_email.data[0]
|
70 |
-
emailstr = email_data.decode("utf-8")
|
71 |
b = email.message_from_string(emailstr)
|
72 |
body = ""
|
73 |
|
@@ -83,20 +86,20 @@ def email_body_extractor(email_data):
|
|
83 |
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
|
84 |
else:
|
85 |
body = b.get_payload()
|
86 |
-
|
87 |
body = body.replace('\n', '')
|
88 |
body = body.replace('\t', '')
|
89 |
body = body.replace('\r', '')
|
90 |
body = body.replace('</b>', '')
|
91 |
body = body.replace('<b>', '')
|
92 |
|
93 |
-
|
94 |
-
## Extract urls in the email body and get url counts
|
95 |
extractor = URLExtract()
|
96 |
urls = extractor.find_urls(body)
|
97 |
url_cnt = len(urls)
|
98 |
-
|
99 |
-
body = re.sub(
|
|
|
100 |
sep = '©'
|
101 |
body = body.split(sep, 1)[0]
|
102 |
character_cnt = sum(not chr.isspace() for chr in body)
|
@@ -106,7 +109,7 @@ def email_body_extractor(email_data):
|
|
106 |
|
107 |
def add_bg_from_url():
|
108 |
st.markdown(
|
109 |
-
|
110 |
<style>
|
111 |
.stApp {{
|
112 |
background-image: linear-gradient(#0A3144,#126072,#1C8D99);
|
@@ -115,10 +118,11 @@ def add_bg_from_url():
|
|
115 |
}}
|
116 |
</style>
|
117 |
""",
|
118 |
-
|
119 |
-
|
|
|
|
|
120 |
|
121 |
-
# add_bg_from_url()
|
122 |
|
123 |
st.markdown("# Character Count: Email Industry")
|
124 |
|
@@ -128,7 +132,7 @@ st.markdown("# Character Count: Email Industry")
|
|
128 |
# img = PIL.Image.open("figures/ModelCC_solid.png")
|
129 |
# st.image(img)
|
130 |
|
131 |
-
stats_col1, stats_col2, stats_col3, stats_col4 = st.columns([1,1,1,1])
|
132 |
|
133 |
with stats_col1:
|
134 |
st.metric(label="Production", value="Production")
|
@@ -142,7 +146,6 @@ with stats_col4:
|
|
142 |
st.metric(label="Industry", value="Email")
|
143 |
|
144 |
|
145 |
-
|
146 |
with st.sidebar:
|
147 |
|
148 |
with st.expander('Model Description', expanded=False):
|
@@ -160,9 +163,9 @@ with st.sidebar:
|
|
160 |
st.markdown(hide_table_row_index, unsafe_allow_html=True)
|
161 |
st.table(table_data())
|
162 |
|
163 |
-
url_button('Model Homepage','https://www.loxz.com/#/models/CTA')
|
164 |
# url_button('Full Report','https://resources.loxz.com/reports/realtime-ml-character-count-model')
|
165 |
-
url_button('Amazon Market Place','https://aws.amazon.com/marketplace')
|
166 |
|
167 |
|
168 |
industry_lists = [
|
@@ -178,24 +181,25 @@ industry_lists = [
|
|
178 |
]
|
179 |
|
180 |
campaign_types = [
|
181 |
-
'Promotional',
|
182 |
-
'Transactional',
|
183 |
-
'Webinar',
|
184 |
-
'Survey',
|
185 |
-
'Newsletter',
|
186 |
'Engagement',
|
187 |
-
'Curated_Content',
|
188 |
-
'Review_Request',
|
189 |
-
'Product_Announcement',
|
190 |
'Abandoned_Cart'
|
191 |
]
|
192 |
|
193 |
target_variables = [
|
194 |
-
'conversion_rate',
|
195 |
'click_to_open_rate'
|
196 |
]
|
197 |
|
198 |
-
uploaded_file = st.file_uploader(
|
|
|
199 |
|
200 |
if uploaded_file is None:
|
201 |
# upload_img = PIL.Image.open(uploaded_file)
|
@@ -210,8 +214,8 @@ industry = st.selectbox(
|
|
210 |
index=6
|
211 |
)
|
212 |
|
213 |
-
campaign
|
214 |
-
'Please select your
|
215 |
campaign_types,
|
216 |
index=5
|
217 |
)
|
@@ -230,24 +234,24 @@ char_reco_preference = st.selectbox(
|
|
230 |
index=1)
|
231 |
|
232 |
|
233 |
-
def get_files_from_aws(bucket,prefix):
|
234 |
"""
|
235 |
get files from aws s3 bucket
|
236 |
-
|
237 |
bucket (STRING): bucket name
|
238 |
prefix (STRING): file location in s3 bucket
|
239 |
"""
|
240 |
s3_client = boto3.client('s3',
|
241 |
-
|
242 |
-
|
243 |
|
244 |
-
file_obj = s3_client.get_object(Bucket=bucket,Key=prefix)
|
245 |
body = file_obj['Body']
|
246 |
string = body.read().decode('utf-8')
|
247 |
-
|
248 |
df = pd.read_csv(StringIO(string))
|
249 |
-
|
250 |
-
return df
|
251 |
|
252 |
|
253 |
# st.info([industry,campaign,target,char_reco_preference])
|
@@ -264,20 +268,26 @@ if st.button('Generate Predictions'):
|
|
264 |
# Starting predictions
|
265 |
model = joblib.load('models/models.sav')
|
266 |
# Generate Email Data
|
267 |
-
email_data = get_files_from_aws(
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
# Dataset:
|
275 |
-
training_dataset = get_files_from_aws(
|
|
|
276 |
# X_test = get_files_from_aws('emailcampaigntrainingdata','modelCC/Xtest.csv')
|
277 |
# Y_test = get_files_from_aws('emailcampaigntrainingdata','modelCC/ytest.csv')
|
278 |
|
279 |
# print("Getting Data Time: %s seconds" % (time.time() - start_time))
|
280 |
-
|
281 |
industry_code_dict = get_industry_code_dict(email_data)
|
282 |
bytes_data = uploaded_file.getvalue()
|
283 |
|
@@ -287,24 +297,28 @@ if st.button('Generate Predictions'):
|
|
287 |
# Need to solve X test issue
|
288 |
|
289 |
# y_pred = model.predict(X_test)
|
290 |
-
df_uploaded = pd.DataFrame(
|
|
|
291 |
df_uploaded.loc[0] = [character_cnt, url_cnt, industry]
|
292 |
df_uploaded["industry_code"] = industry_code_dict.get(industry)
|
293 |
-
df_uploaded_test = df_uploaded[[
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
|
298 |
if output_rate < 0:
|
299 |
-
print(
|
|
|
300 |
else:
|
301 |
-
st.markdown('#### Current Character Count in Your Email is: <span style="color:blue">{}</span>'.format(
|
|
|
302 |
# st.info('The model predicts that it achieves a {} of {}%'.format(target, str(round(output_rate*100,2))))
|
303 |
if target == 'conversion_rate':
|
304 |
target_vis = 'Conversion Rate'
|
305 |
else:
|
306 |
target_vis = 'Click-to-Open Rate'
|
307 |
-
st.markdown('#### The model predicts that it achieves a <span style="color:blue">{}</span> of <span style="color:blue">{}</span>%'.format(
|
|
|
308 |
selected_industry_code = industry_code_dict.get(industry)
|
309 |
|
310 |
if target == "click_to_open_rate":
|
@@ -312,38 +326,43 @@ if st.button('Generate Predictions'):
|
|
312 |
if target == "conversion_rate":
|
313 |
selected_variable = "Click_Through_Rate"
|
314 |
|
315 |
-
|
316 |
-
|
317 |
-
df_reco = df_reco[df_reco["industry_code"]
|
318 |
-
|
|
|
|
|
319 |
df_reco_sort = df_reco.sort_values(by=[selected_variable])
|
320 |
df_reco = df_reco.drop_duplicates(subset=selected_variable)
|
321 |
|
322 |
preference = char_reco_preference
|
323 |
if preference == "Increase":
|
324 |
-
df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (
|
325 |
-
|
326 |
-
|
|
|
327 |
if preference == "Decrease":
|
328 |
-
df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (
|
329 |
-
|
330 |
-
|
331 |
|
332 |
if selected_variable == "Open_Rate":
|
333 |
selected_variable = "Click-to-Open_Rate"
|
334 |
if selected_variable == "Click_Through_Rate":
|
335 |
selected_variable = "Conversion_Rate"
|
336 |
|
337 |
-
st.markdown('#### To get higher, <span style="color:blue">{}</span>, the model recommends the following options:'.format(
|
|
|
338 |
if len(df_reco_opt_rank) == 0:
|
339 |
-
st.markdown('#### You ve already achieved the highest, <span style="color:blue">{}</span>, with the current character count!'.format(
|
|
|
340 |
else:
|
341 |
for _, row in df_reco_opt_rank.iterrows():
|
342 |
Character_Count = row[1]
|
343 |
selected_variable = row[3]
|
344 |
# print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
|
345 |
-
st.markdown('Number of Characters: {}, Target Rate: {}'.format(
|
346 |
-
|
347 |
|
348 |
placeholder.empty()
|
349 |
-
# print(time.time() - start_time)
|
|
|
39 |
]
|
40 |
|
41 |
data = {
|
42 |
+
'Field': field,
|
43 |
+
'Data': data
|
44 |
}
|
45 |
|
46 |
df = pd.DataFrame.from_dict(data)
|
|
|
48 |
return df
|
49 |
|
50 |
|
51 |
+
def url_button(button_name, url):
|
|
|
52 |
if st.button(button_name):
|
53 |
+
js = """window.open('{url}')""".format(url=url) # New tab or window
|
54 |
html = '<img src onerror="{}">'.format(js)
|
55 |
div = Div(text=html)
|
56 |
st.bokeh_chart(div)
|
57 |
|
58 |
+
|
59 |
def get_industry_code_dict(training_dataset):
|
60 |
+
training_dataset['industry_code'] = training_dataset['industry'].astype(
|
61 |
+
'category')
|
62 |
cat_columns = training_dataset.select_dtypes(['category']).columns
|
63 |
+
training_dataset[cat_columns] = training_dataset[cat_columns].apply(
|
64 |
+
lambda x: x.cat.codes)
|
65 |
+
industry_code_dict = dict(
|
66 |
+
zip(training_dataset.industry, training_dataset.industry_code))
|
67 |
return industry_code_dict
|
68 |
|
69 |
|
70 |
+
# extract email body from parse email
|
71 |
+
def email_body_extractor(email_data):
|
72 |
# email_data = parsed_email.data[0]
|
73 |
+
emailstr = email_data.decode("utf-8")
|
74 |
b = email.message_from_string(emailstr)
|
75 |
body = ""
|
76 |
|
|
|
86 |
# not multipart - i.e. plain text, no attachments, keeping fingers crossed
|
87 |
else:
|
88 |
body = b.get_payload()
|
89 |
+
# Remove escape sequences
|
90 |
body = body.replace('\n', '')
|
91 |
body = body.replace('\t', '')
|
92 |
body = body.replace('\r', '')
|
93 |
body = body.replace('</b>', '')
|
94 |
body = body.replace('<b>', '')
|
95 |
|
96 |
+
# Extract urls in the email body and get url counts
|
|
|
97 |
extractor = URLExtract()
|
98 |
urls = extractor.find_urls(body)
|
99 |
url_cnt = len(urls)
|
100 |
+
# Remove urls
|
101 |
+
body = re.sub(
|
102 |
+
r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', body)
|
103 |
sep = '©'
|
104 |
body = body.split(sep, 1)[0]
|
105 |
character_cnt = sum(not chr.isspace() for chr in body)
|
|
|
109 |
|
110 |
def add_bg_from_url():
|
111 |
st.markdown(
|
112 |
+
f"""
|
113 |
<style>
|
114 |
.stApp {{
|
115 |
background-image: linear-gradient(#0A3144,#126072,#1C8D99);
|
|
|
118 |
}}
|
119 |
</style>
|
120 |
""",
|
121 |
+
unsafe_allow_html=True
|
122 |
+
)
|
123 |
+
|
124 |
+
# add_bg_from_url()
|
125 |
|
|
|
126 |
|
127 |
st.markdown("# Character Count: Email Industry")
|
128 |
|
|
|
132 |
# img = PIL.Image.open("figures/ModelCC_solid.png")
|
133 |
# st.image(img)
|
134 |
|
135 |
+
stats_col1, stats_col2, stats_col3, stats_col4 = st.columns([1, 1, 1, 1])
|
136 |
|
137 |
with stats_col1:
|
138 |
st.metric(label="Production", value="Production")
|
|
|
146 |
st.metric(label="Industry", value="Email")
|
147 |
|
148 |
|
|
|
149 |
with st.sidebar:
|
150 |
|
151 |
with st.expander('Model Description', expanded=False):
|
|
|
163 |
st.markdown(hide_table_row_index, unsafe_allow_html=True)
|
164 |
st.table(table_data())
|
165 |
|
166 |
+
url_button('Model Homepage', 'https://www.loxz.com/#/models/CTA')
|
167 |
# url_button('Full Report','https://resources.loxz.com/reports/realtime-ml-character-count-model')
|
168 |
+
url_button('Amazon Market Place', 'https://aws.amazon.com/marketplace')
|
169 |
|
170 |
|
171 |
industry_lists = [
|
|
|
181 |
]
|
182 |
|
183 |
campaign_types = [
|
184 |
+
'Promotional',
|
185 |
+
'Transactional',
|
186 |
+
'Webinar',
|
187 |
+
'Survey',
|
188 |
+
'Newsletter',
|
189 |
'Engagement',
|
190 |
+
'Curated_Content',
|
191 |
+
'Review_Request',
|
192 |
+
'Product_Announcement',
|
193 |
'Abandoned_Cart'
|
194 |
]
|
195 |
|
196 |
target_variables = [
|
197 |
+
'conversion_rate',
|
198 |
'click_to_open_rate'
|
199 |
]
|
200 |
|
201 |
+
uploaded_file = st.file_uploader(
|
202 |
+
"Please upload your email (In HTML Format)", type=["html"])
|
203 |
|
204 |
if uploaded_file is None:
|
205 |
# upload_img = PIL.Image.open(uploaded_file)
|
|
|
214 |
index=6
|
215 |
)
|
216 |
|
217 |
+
campaign = st.selectbox(
|
218 |
+
'Please select your campaign type',
|
219 |
campaign_types,
|
220 |
index=5
|
221 |
)
|
|
|
234 |
index=1)
|
235 |
|
236 |
|
237 |
+
def get_files_from_aws(bucket, prefix):
|
238 |
"""
|
239 |
get files from aws s3 bucket
|
240 |
+
|
241 |
bucket (STRING): bucket name
|
242 |
prefix (STRING): file location in s3 bucket
|
243 |
"""
|
244 |
s3_client = boto3.client('s3',
|
245 |
+
aws_access_key_id=st.secrets["aws_id"],
|
246 |
+
aws_secret_access_key=st.secrets["aws_key"])
|
247 |
|
248 |
+
file_obj = s3_client.get_object(Bucket=bucket, Key=prefix)
|
249 |
body = file_obj['Body']
|
250 |
string = body.read().decode('utf-8')
|
251 |
+
|
252 |
df = pd.read_csv(StringIO(string))
|
253 |
+
|
254 |
+
return df
|
255 |
|
256 |
|
257 |
# st.info([industry,campaign,target,char_reco_preference])
|
|
|
268 |
# Starting predictions
|
269 |
model = joblib.load('models/models.sav')
|
270 |
# Generate Email Data
|
271 |
+
email_data = get_files_from_aws(
|
272 |
+
'emailcampaigntrainingdata', 'trainingdata/email_dataset_training.csv')
|
273 |
+
acc_data = get_files_from_aws(
|
274 |
+
'emailcampaigntrainingdata', 'trainingdata/email_dataset_training_raw.csv')
|
275 |
+
|
276 |
+
email_data_ = email_data[["email_body", "industry", "campaign_type",
|
277 |
+
"character_cnt", "url_cnt", "Open_Rate", "Click_Through_Rate"]]
|
278 |
+
email_data_ = email_data_.rename(
|
279 |
+
{'Open_Rate': 'Click-to-open_Rate', 'Click_Through_Rate': 'Conversion_Rate'})
|
280 |
+
df_email_data = email_data_.rename(
|
281 |
+
columns={'Open_Rate': 'Click-to-open_Rate', 'Click_Through_Rate': 'Conversion_Rate'})
|
282 |
|
283 |
# Dataset:
|
284 |
+
training_dataset = get_files_from_aws(
|
285 |
+
'emailcampaigntrainingdata', 'modelCC/training.csv')
|
286 |
# X_test = get_files_from_aws('emailcampaigntrainingdata','modelCC/Xtest.csv')
|
287 |
# Y_test = get_files_from_aws('emailcampaigntrainingdata','modelCC/ytest.csv')
|
288 |
|
289 |
# print("Getting Data Time: %s seconds" % (time.time() - start_time))
|
290 |
+
|
291 |
industry_code_dict = get_industry_code_dict(email_data)
|
292 |
bytes_data = uploaded_file.getvalue()
|
293 |
|
|
|
297 |
# Need to solve X test issue
|
298 |
|
299 |
# y_pred = model.predict(X_test)
|
300 |
+
df_uploaded = pd.DataFrame(
|
301 |
+
columns=['character_cnt', "url_cnt", "industry"])
|
302 |
df_uploaded.loc[0] = [character_cnt, url_cnt, industry]
|
303 |
df_uploaded["industry_code"] = industry_code_dict.get(industry)
|
304 |
+
df_uploaded_test = df_uploaded[[
|
305 |
+
"industry_code", "character_cnt", "url_cnt"]]
|
306 |
+
predicted_rate = model.predict(df_uploaded_test)[0]
|
307 |
+
output_rate = round(predicted_rate, 4)
|
308 |
|
309 |
if output_rate < 0:
|
310 |
+
print(
|
311 |
+
"Sorry, Current model couldn't provide predictions on the target variable you selected.")
|
312 |
else:
|
313 |
+
st.markdown('#### Current Character Count in Your Email is: <span style="color:blue">{}</span>'.format(
|
314 |
+
character_cnt), unsafe_allow_html=True)
|
315 |
# st.info('The model predicts that it achieves a {} of {}%'.format(target, str(round(output_rate*100,2))))
|
316 |
if target == 'conversion_rate':
|
317 |
target_vis = 'Conversion Rate'
|
318 |
else:
|
319 |
target_vis = 'Click-to-Open Rate'
|
320 |
+
st.markdown('#### The model predicts that it achieves a <span style="color:blue">{}</span> of <span style="color:blue">{}</span>%'.format(
|
321 |
+
target_vis, str(round(output_rate*100, 2))), unsafe_allow_html=True)
|
322 |
selected_industry_code = industry_code_dict.get(industry)
|
323 |
|
324 |
if target == "click_to_open_rate":
|
|
|
326 |
if target == "conversion_rate":
|
327 |
selected_variable = "Click_Through_Rate"
|
328 |
|
329 |
+
df_reco = training_dataset[[
|
330 |
+
"industry_code", "character_cnt", "url_cnt", selected_variable]]
|
331 |
+
df_reco = df_reco[df_reco["industry_code"]
|
332 |
+
== selected_industry_code]
|
333 |
+
df_reco[selected_variable] = df_reco[selected_variable].apply(
|
334 |
+
lambda x: round(x, 3))
|
335 |
df_reco_sort = df_reco.sort_values(by=[selected_variable])
|
336 |
df_reco = df_reco.drop_duplicates(subset=selected_variable)
|
337 |
|
338 |
preference = char_reco_preference
|
339 |
if preference == "Increase":
|
340 |
+
df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (
|
341 |
+
df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
|
342 |
+
df_reco_opt_rank = df_reco_opt.nlargest(3, [selected_variable])
|
343 |
+
# decrease character reco
|
344 |
if preference == "Decrease":
|
345 |
+
df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (
|
346 |
+
df_reco["character_cnt"] < character_cnt)]
|
347 |
+
df_reco_opt_rank = df_reco_opt.nlargest(3, [selected_variable])
|
348 |
|
349 |
if selected_variable == "Open_Rate":
|
350 |
selected_variable = "Click-to-Open_Rate"
|
351 |
if selected_variable == "Click_Through_Rate":
|
352 |
selected_variable = "Conversion_Rate"
|
353 |
|
354 |
+
st.markdown('#### To get higher, <span style="color:blue">{}</span>, the model recommends the following options:'.format(
|
355 |
+
selected_variable), unsafe_allow_html=True)
|
356 |
if len(df_reco_opt_rank) == 0:
|
357 |
+
st.markdown('#### You ve already achieved the highest, <span style="color:blue">{}</span>, with the current character count!'.format(
|
358 |
+
selected_variable), unsafe_allow_html=True)
|
359 |
else:
|
360 |
for _, row in df_reco_opt_rank.iterrows():
|
361 |
Character_Count = row[1]
|
362 |
selected_variable = row[3]
|
363 |
# print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
|
364 |
+
st.markdown('Number of Characters: {}, Target Rate: {}'.format(
|
365 |
+
int(Character_Count), round(selected_variable*100, 3)))
|
366 |
|
367 |
placeholder.empty()
|
368 |
+
# print(time.time() - start_time)
|