Andy Lau commited on
Commit
8c65dfe
·
1 Parent(s): dad7d04

wip_model output working

Browse files
Files changed (1) hide show
  1. app.py +45 -31
app.py CHANGED
@@ -5,6 +5,8 @@ import PIL
5
  import re
6
  from io import StringIO
7
  import boto3
 
 
8
 
9
  # from joblib import dump, load
10
 
@@ -13,9 +15,6 @@ import joblib
13
  from bokeh.models.widgets import Div
14
 
15
  import email
16
- from urlextract import URLExtract
17
-
18
- import main_app
19
 
20
 
21
  def table_data():
@@ -105,13 +104,6 @@ def email_body_extractor(email_data):
105
 
106
  return body, character_cnt, url_cnt
107
 
108
- # def select_char_preference_variables():
109
- # opt_list = ["Increase", "Decrease"]
110
- # button_option = widgets.RadioButtons(options = opt_list)
111
- # print("Do you want to increase or decrease your character count in the email?")
112
- # display(button_option)
113
- # return button_option
114
-
115
 
116
  def add_bg_from_url():
117
  st.markdown(
@@ -215,26 +207,30 @@ if uploaded_file is None:
215
 
216
  industry = st.selectbox(
217
  'Please select your industry',
218
- industry_lists
 
219
  )
220
 
221
  campaign = st.selectbox(
222
  'Please select your industry',
223
- campaign_types
 
224
  )
225
 
226
  target = st.selectbox(
227
  'Please select your target variable',
228
- target_variables
 
229
  )
230
 
231
  st.markdown("""---""")
232
 
233
  char_reco_preference = st.selectbox(
234
  'Do you want to increase or decrease your character count in the email?',
235
- ["Increase", "Decrease"])
236
-
237
 
 
238
  def get_files_from_aws(bucket,prefix):
239
  """
240
  get files from aws s3 bucket
@@ -255,6 +251,7 @@ def get_files_from_aws(bucket,prefix):
255
  return df
256
 
257
 
 
258
 
259
 
260
  if st.button('Generate Predictions'):
@@ -266,7 +263,6 @@ if st.button('Generate Predictions'):
266
 
267
  # Starting predictions
268
  model = joblib.load('models/models.sav')
269
- print(type(model))
270
  # Generate Email Data
271
  email_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training.csv')
272
  acc_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training_raw.csv')
@@ -282,7 +278,6 @@ if st.button('Generate Predictions'):
282
 
283
 
284
  industry_code_dict = get_industry_code_dict(email_data)
285
-
286
  bytes_data = uploaded_file.getvalue()
287
 
288
  email_body, character_cnt, url_cnt = email_body_extractor(bytes_data)
@@ -296,48 +291,67 @@ if st.button('Generate Predictions'):
296
  df_uploaded["industry_code"] = industry_code_dict.get(industry)
297
  df_uploaded_test = df_uploaded[["industry_code", "character_cnt", "url_cnt"]]
298
  predicted_rate = model.predict(df_uploaded_test)[0]
299
- output_rate = round(predicted_rate*100,2)
300
- print(output_rate)
301
- # output_rate = 0.5
302
  if output_rate < 0:
303
  print("Sorry, Current model couldn't provide predictions on the target variable you selected.")
304
  else:
305
- st.info('Current Character Count in Your Email is: {}'.format(character_cnt))
306
- st.info('The model predicts that it achieves a {} of {}%'.format(target, str(output_rate)))
307
-
308
- # print(target)
 
 
 
 
 
309
  if target == "click_to_open_rate":
310
  selected_variable = "Open_Rate"
311
  if target == "conversion_rate":
312
  selected_variable = "Click_Through_Rate"
313
 
 
314
  df_reco = training_dataset[["industry_code", "character_cnt", "url_cnt", selected_variable]]
315
- df_reco = df_reco[df_reco["industry_code"] == industry]
316
  df_reco[selected_variable]=df_reco[selected_variable].apply(lambda x:round(x, 3))
317
  df_reco_sort = df_reco.sort_values(by=[selected_variable])
318
  df_reco = df_reco.drop_duplicates(subset=selected_variable)
319
 
320
- if char_reco_preference == "Increase":
 
321
  df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
322
  df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
323
- else:
 
324
  df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] < character_cnt)]
325
  df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
326
-
 
 
 
 
 
 
 
 
 
 
 
 
327
  if selected_variable == "Open_Rate":
328
  selected_variable = "Click-to-Open_Rate"
329
  if selected_variable == "Click_Through_Rate":
330
  selected_variable = "Conversion_Rate"
331
 
332
- st.info('To get higher, {},the model recommends the following options:'.format(selected_variable))
333
  if len(df_reco_opt_rank) == 0:
334
- st.info('You ve already achieved the highest, {}, with the current character count!'.format(selected_variable))
335
  else:
336
  for _, row in df_reco_opt_rank.iterrows():
337
  Character_Count = row[1]
338
  selected_variable = row[3]
339
  # print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
340
- st.info('Number of Characters: {}, Target Rate: {}'.format(int(Character_Count), round(selected_variable, 3)*100))
341
 
342
 
343
  placeholder.empty()
 
5
  import re
6
  from io import StringIO
7
  import boto3
8
+ from urlextract import URLExtract
9
+
10
 
11
  # from joblib import dump, load
12
 
 
15
  from bokeh.models.widgets import Div
16
 
17
  import email
 
 
 
18
 
19
 
20
  def table_data():
 
104
 
105
  return body, character_cnt, url_cnt
106
 
 
 
 
 
 
 
 
107
 
108
  def add_bg_from_url():
109
  st.markdown(
 
207
 
208
  industry = st.selectbox(
209
  'Please select your industry',
210
+ industry_lists,
211
+ index=6
212
  )
213
 
214
  campaign = st.selectbox(
215
  'Please select your industry',
216
+ campaign_types,
217
+ index=5
218
  )
219
 
220
  target = st.selectbox(
221
  'Please select your target variable',
222
+ target_variables,
223
+ index=1
224
  )
225
 
226
  st.markdown("""---""")
227
 
228
  char_reco_preference = st.selectbox(
229
  'Do you want to increase or decrease your character count in the email?',
230
+ ["Increase", "Decrease"],
231
+ index=1)
232
 
233
+ st.cache()
234
  def get_files_from_aws(bucket,prefix):
235
  """
236
  get files from aws s3 bucket
 
251
  return df
252
 
253
 
254
+ # st.info([industry,campaign,target,char_reco_preference])
255
 
256
 
257
  if st.button('Generate Predictions'):
 
263
 
264
  # Starting predictions
265
  model = joblib.load('models/models.sav')
 
266
  # Generate Email Data
267
  email_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training.csv')
268
  acc_data = get_files_from_aws('emailcampaigntrainingdata','trainingdata/email_dataset_training_raw.csv')
 
278
 
279
 
280
  industry_code_dict = get_industry_code_dict(email_data)
 
281
  bytes_data = uploaded_file.getvalue()
282
 
283
  email_body, character_cnt, url_cnt = email_body_extractor(bytes_data)
 
291
  df_uploaded["industry_code"] = industry_code_dict.get(industry)
292
  df_uploaded_test = df_uploaded[["industry_code", "character_cnt", "url_cnt"]]
293
  predicted_rate = model.predict(df_uploaded_test)[0]
294
+ output_rate = round(predicted_rate,4)
295
+
 
296
  if output_rate < 0:
297
  print("Sorry, Current model couldn't provide predictions on the target variable you selected.")
298
  else:
299
+ st.markdown('### Current Character Count in Your Email is: <span style="color:blue">{}</span>'.format(character_cnt), unsafe_allow_html=True)
300
+ # st.info('The model predicts that it achieves a {} of {}%'.format(target, str(round(output_rate*100,2))))
301
+ if target == 'conversion_rate':
302
+ target_vis = 'Conversion Rate'
303
+ else:
304
+ target_vis = 'Click-to-Open Rate'
305
+ st.markdown('### The model predicts that it achieves a <span style="color:blue">{}</span> of {}%'.format(target_vis, str(round(output_rate*100,2))),unsafe_allow_html=True)
306
+ selected_industry_code = industry_code_dict.get(industry)
307
+
308
  if target == "click_to_open_rate":
309
  selected_variable = "Open_Rate"
310
  if target == "conversion_rate":
311
  selected_variable = "Click_Through_Rate"
312
 
313
+
314
  df_reco = training_dataset[["industry_code", "character_cnt", "url_cnt", selected_variable]]
315
+ df_reco = df_reco[df_reco["industry_code"] == selected_industry_code]
316
  df_reco[selected_variable]=df_reco[selected_variable].apply(lambda x:round(x, 3))
317
  df_reco_sort = df_reco.sort_values(by=[selected_variable])
318
  df_reco = df_reco.drop_duplicates(subset=selected_variable)
319
 
320
+ preference = char_reco_preference
321
+ if preference == "Increase":
322
  df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
323
  df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
324
+ ## decrease character reco
325
+ if preference == "Decrease":
326
  df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] < character_cnt)]
327
  df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
328
+
329
+
330
+ # something wrong here
331
+ # if char_reco_preference == "Increase":
332
+ # df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] > character_cnt) & (df_reco["character_cnt"] <= (1.5*character_cnt))]
333
+ # df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
334
+ # if char_reco_preference == "Decrease":
335
+ # df_reco_opt = df_reco[(df_reco[selected_variable] > output_rate) & (df_reco["character_cnt"] < character_cnt)]
336
+ # df_reco_opt_rank = df_reco_opt.nlargest(3,[selected_variable])
337
+
338
+ # print(df_reco_opt)
339
+ # print(df_reco_opt_rank)
340
+
341
  if selected_variable == "Open_Rate":
342
  selected_variable = "Click-to-Open_Rate"
343
  if selected_variable == "Click_Through_Rate":
344
  selected_variable = "Conversion_Rate"
345
 
346
+ st.markdown('### To get higher, <span style="color:blue">{}</span>,the model recommends the following options:'.format(selected_variable),unsafe_allow_html=True)
347
  if len(df_reco_opt_rank) == 0:
348
+ st.info('You ve already achieved the highest, <span style="color:blue">{}</span>, with the current character count!'.format(selected_variable),unsafe_allow_html=True)
349
  else:
350
  for _, row in df_reco_opt_rank.iterrows():
351
  Character_Count = row[1]
352
  selected_variable = row[3]
353
  # print(f"·Number of Characters: {int(Character_Count)}, Target Rate: {round(selected_variable, 3)*100}", "%")
354
+ st.markdown('#### Number of Characters: {}, Target Rate: {}'.format(int(Character_Count), round(selected_variable*100, 3)))
355
 
356
 
357
  placeholder.empty()