Spaces:
Runtime error
Runtime error
Andy Lau
commited on
Commit
·
8c65dfe
1
Parent(s):
dad7d04
wip_model output working
Browse files
app.py
CHANGED
@@ -5,6 +5,8 @@ import PIL
|
|
5 |
import re
|
6 |
from io import StringIO
|
7 |
import boto3
|
|
|
|
|
8 |
|
9 |
# from joblib import dump, load
|
10 |
|
@@ -13,9 +15,6 @@ import joblib
|
|
13 |
from bokeh.models.widgets import Div
|
14 |
|
15 |
import email
|
16 |
-
from urlextract import URLExtract
|
17 |
-
|
18 |
-
import main_app
|
19 |
|
20 |
|
21 |
def table_data():
|
@@ -105,13 +104,6 @@ def email_body_extractor(email_data):
|
|
105 |
|
106 |
return body, character_cnt, url_cnt
|
107 |
|
108 |
-
# def select_char_preference_variables():
|
109 |
-
# opt_list = ["Increase", "Decrease"]
|
110 |
-
# button_option = widgets.RadioButtons(options = opt_list)
|
111 |
-
# print("Do you want to increase or decrease your character count in the email?")
|
112 |
-
# display(button_option)
|
113 |
-
# return button_option
|
114 |
-
|
115 |
|
116 |
def add_bg_from_url():
|
117 |
st.markdown(
|
@@ -215,26 +207,30 @@ if uploaded_file is None:
|
|
215 |
|
216 |
industry = st.selectbox(
|
217 |
'Please select your industry',
|
218 |
-
industry_lists
|
|
|
219 |
)
|
220 |
|
221 |
campaign = st.selectbox(
|
222 |
'Please select your industry',
|
223 |
-
campaign_types
|
|
|
224 |
)
|
225 |
|
226 |
target = st.selectbox(
|
227 |
'Please select your target variable',
|
228 |
-
target_variables
|
|
|
229 |
)
|
230 |
|
231 |
st.markdown("""---""")
|
232 |
|
233 |
char_reco_preference = st.selectbox(
|
234 |
'Do you want to increase or decrease your character count in the email?',
|
235 |
-
["Increase", "Decrease"]
|
236 |
-
|
237 |
|
|
|
238 |
def get_files_from_aws(bucket,prefix):
|
239 |
"""
|
240 |
get files from aws s3 bucket
|
@@ -255,6 +251,7 @@ def get_files_from_aws(bucket,prefix):
|
|
255 |
return df
|
256 |
|
257 |
|
|
|
258 |
|
259 |
|
260 |
if st.button('Generate Predictions'):
|
@@ -266,7 +263,6 @@ if st.button('Generate Predictions'):
|
|
266 |
|
267 |
# Starting predictions
|
268 |
model = joblib.load('models/models.sav')
|
269 |
-
print(type(model))
|
270 |
# Generate Email Data
|
271 |
email_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training.csv')
|
272 |
acc_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training_raw.csv')
|
@@ -282,7 +278,6 @@ if st.button('Generate Predictions'):
|
|
282 |
|
283 |
|
284 |
industry_code_dict = get_industry_code_dict(email_data)
|
285 |
-
|
286 |
bytes_data = uploaded_file.getvalue()
|
287 |
|
288 |
email_body, character_cnt, url_cnt = email_body_extractor(bytes_data)
|
@@ -296,48 +291,67 @@ if st.button('Generate Predictions'):
|
|
296 |
df_uploaded["industry_code"] = industry_code_dict.get(industry)
|
297 |
df_uploaded_test = df_uploaded[["industry_code", "character_cnt", "url_cnt"]]
|
298 |
predicted_rate = model.predict(df_uploaded_test)[0]
|
299 |
-
output_rate = round(predicted_rate
|
300 |
-
|
301 |
-
# output_rate = 0.5
|
302 |
if output_rate < 0:
|
303 |
print("Sorry, Current model couldn't provide predictions on the target variable you selected.")
|
304 |
else:
|
305 |
-
st.
|
306 |
-
st.info('The model predicts that it achieves a {} of {}%'.format(target, str(output_rate)))
|
307 |
-
|
308 |
-
|
|
|
|
|
|
|
|
|
|
|
309 |
if target == "click_to_open_rate":
|
310 |
selected_variable = "Open_Rate"
|
311 |
if target == "conversion_rate":
|
312 |
selected_variable = "Click_Through_Rate"
|
313 |
|
|
|
314 |
df_reco = training_dataset[["industry_code", "character_cnt", "url_cnt", selected_variable]]
|
315 |
-
df_reco = df_reco[df_reco["industry_code"] ==
|
316 |
df_reco[selected_variable]=df_reco[selected_variable].apply(lambda x:round(x, 3))
|
317 |
df_reco_sort = df_reco.sort_values(by=[selected_variable])
|
318 |
df_reco = df_reco.drop_duplicates(subset=selected_variable)
|
319 |
|
320 |
-
|
|
|
321 |
df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
|
322 |
df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
|
323 |
-
|
|
|
324 |
df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] < character_cnt)]
|
325 |
df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
|
326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
if selected_variable == "Open_Rate":
|
328 |
selected_variable = "Click-to-Open_Rate"
|
329 |
if selected_variable == "Click_Through_Rate":
|
330 |
selected_variable = "Conversion_Rate"
|
331 |
|
332 |
-
st.
|
333 |
if len(df_reco_opt_rank) == 0:
|
334 |
-
st.info('You ve already achieved the highest, {}
|
335 |
else:
|
336 |
for _, row in df_reco_opt_rank.iterrows():
|
337 |
Character_Count = row[1]
|
338 |
selected_variable = row[3]
|
339 |
# print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
|
340 |
-
st.
|
341 |
|
342 |
|
343 |
placeholder.empty()
|
|
|
5 |
import re
|
6 |
from io import StringIO
|
7 |
import boto3
|
8 |
+
from urlextract import URLExtract
|
9 |
+
|
10 |
|
11 |
# from joblib import dump, load
|
12 |
|
|
|
15 |
from bokeh.models.widgets import Div
|
16 |
|
17 |
import email
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
def table_data():
|
|
|
104 |
|
105 |
return body, character_cnt, url_cnt
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
def add_bg_from_url():
|
109 |
st.markdown(
|
|
|
207 |
|
208 |
industry = st.selectbox(
|
209 |
'Please select your industry',
|
210 |
+
industry_lists,
|
211 |
+
index=6
|
212 |
)
|
213 |
|
214 |
campaign = st.selectbox(
|
215 |
'Please select your industry',
|
216 |
+
campaign_types,
|
217 |
+
index=5
|
218 |
)
|
219 |
|
220 |
target = st.selectbox(
|
221 |
'Please select your target variable',
|
222 |
+
target_variables,
|
223 |
+
index=1
|
224 |
)
|
225 |
|
226 |
st.markdown("""---""")
|
227 |
|
228 |
char_reco_preference = st.selectbox(
|
229 |
'Do you want to increase or decrease your character count in the email?',
|
230 |
+
["Increase", "Decrease"],
|
231 |
+
index=1)
|
232 |
|
233 |
+
st.cache()
|
234 |
def get_files_from_aws(bucket,prefix):
|
235 |
"""
|
236 |
get files from aws s3 bucket
|
|
|
251 |
return df
|
252 |
|
253 |
|
254 |
+
# st.info([industry,campaign,target,char_reco_preference])
|
255 |
|
256 |
|
257 |
if st.button('Generate Predictions'):
|
|
|
263 |
|
264 |
# Starting predictions
|
265 |
model = joblib.load('models/models.sav')
|
|
|
266 |
# Generate Email Data
|
267 |
email_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training.csv')
|
268 |
acc_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training_raw.csv')
|
|
|
278 |
|
279 |
|
280 |
industry_code_dict = get_industry_code_dict(email_data)
|
|
|
281 |
bytes_data = uploaded_file.getvalue()
|
282 |
|
283 |
email_body, character_cnt, url_cnt = email_body_extractor(bytes_data)
|
|
|
291 |
df_uploaded["industry_code"] = industry_code_dict.get(industry)
|
292 |
df_uploaded_test = df_uploaded[["industry_code", "character_cnt", "url_cnt"]]
|
293 |
predicted_rate = model.predict(df_uploaded_test)[0]
|
294 |
+
output_rate = round(predicted_rate,4)
|
295 |
+
|
|
|
296 |
if output_rate < 0:
|
297 |
print("Sorry, Current model couldn't provide predictions on the target variable you selected.")
|
298 |
else:
|
299 |
+
st.markdown('### Current Character Count in Your Email is: <span style="color:blue">{}</span>'.format(character_cnt), unsafe_allow_html=True)
|
300 |
+
# st.info('The model predicts that it achieves a {} of {}%'.format(target, str(round(output_rate*100,2))))
|
301 |
+
if target == 'conversion_rate':
|
302 |
+
target_vis = 'Conversion Rate'
|
303 |
+
else:
|
304 |
+
target_vis = 'Click-to-Open Rate'
|
305 |
+
st.markdown('### The model predicts that it achieves a <span style="color:blue">{}</span> of {}%'.format(target_vis, str(round(output_rate*100,2))),unsafe_allow_html=True)
|
306 |
+
selected_industry_code = industry_code_dict.get(industry)
|
307 |
+
|
308 |
if target == "click_to_open_rate":
|
309 |
selected_variable = "Open_Rate"
|
310 |
if target == "conversion_rate":
|
311 |
selected_variable = "Click_Through_Rate"
|
312 |
|
313 |
+
|
314 |
df_reco = training_dataset[["industry_code", "character_cnt", "url_cnt", selected_variable]]
|
315 |
+
df_reco = df_reco[df_reco["industry_code"] == selected_industry_code]
|
316 |
df_reco[selected_variable]=df_reco[selected_variable].apply(lambda x:round(x, 3))
|
317 |
df_reco_sort = df_reco.sort_values(by=[selected_variable])
|
318 |
df_reco = df_reco.drop_duplicates(subset=selected_variable)
|
319 |
|
320 |
+
preference = char_reco_preference
|
321 |
+
if preference == "Increase":
|
322 |
df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
|
323 |
df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
|
324 |
+
## decrease character reco
|
325 |
+
if preference == "Decrease":
|
326 |
df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] < character_cnt)]
|
327 |
df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
|
328 |
+
|
329 |
+
|
330 |
+
# something wrong here
|
331 |
+
# if char_reco_preference == "Increase":
|
332 |
+
# df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
|
333 |
+
# df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
|
334 |
+
# if char_reco_preference == "Decrease":
|
335 |
+
# df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] < character_cnt)]
|
336 |
+
# df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
|
337 |
+
|
338 |
+
# print(df_reco_opt)
|
339 |
+
# print(df_reco_opt_rank)
|
340 |
+
|
341 |
if selected_variable == "Open_Rate":
|
342 |
selected_variable = "Click-to-Open_Rate"
|
343 |
if selected_variable == "Click_Through_Rate":
|
344 |
selected_variable = "Conversion_Rate"
|
345 |
|
346 |
+
st.markdown('### To get higher, <span style="color:blue">{}</span>,the model recommends the following options:'.format(selected_variable),unsafe_allow_html=True)
|
347 |
if len(df_reco_opt_rank) == 0:
|
348 |
+
st.info('You ve already achieved the highest, <span style="color:blue">{}</span>, with the current character count!'.format(selected_variable),unsafe_allow_html=True)
|
349 |
else:
|
350 |
for _, row in df_reco_opt_rank.iterrows():
|
351 |
Character_Count = row[1]
|
352 |
selected_variable = row[3]
|
353 |
# print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
|
354 |
+
st.markdown('#### Number of Characters: {}, Target Rate: {}'.format(int(Character_Count), round(selected_variable*100, 3)))
|
355 |
|
356 |
|
357 |
placeholder.empty()
|