zzmez commited on
Commit
754305e
·
1 Parent(s): fd392e2

feat: add both NL and offers

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. utils/gradio_utils.py +51 -19
app.py CHANGED
@@ -11,13 +11,13 @@ load_dotenv(dotenv_path=".env", override=True)
11
  USER = os.getenv("USERNAME")
12
  PASS = os.getenv("PASSWORD")
13
 
14
-
15
  list_iface = gr.Interface(fn=compute_offer,
16
  inputs=[gr.File(label="Upload CSV", type="file"),
17
  gr.Slider(1, 365, value=30, step=1, label="Days", info="Number of days to look back"),
18
  gr.Slider(5000, 100000, value=15000, step=1, label="Minimum Sent", info="Minimum number of emails sent"),
19
- gr.Dropdown(["Comcast", "Yahoo", "Hotmail", "Aol"], value="Comcast", label="Domain")
20
- ],
21
  outputs="dataframe")
22
 
23
  # PLOTTING
 
11
  USER = os.getenv("USERNAME")
12
  PASS = os.getenv("PASSWORD")
13
 
14
+ # create an interface and limit output's width for the dataframe bu
15
  list_iface = gr.Interface(fn=compute_offer,
16
  inputs=[gr.File(label="Upload CSV", type="file"),
17
  gr.Slider(1, 365, value=30, step=1, label="Days", info="Number of days to look back"),
18
  gr.Slider(5000, 100000, value=15000, step=1, label="Minimum Sent", info="Minimum number of emails sent"),
19
+ gr.Dropdown(["Comcast", "Yahoo", "Hotmail", "Aol"], value="Comcast", label="Domain"),
20
+ gr.Radio(["Newsletters", "Offers"], label="Type", value="Newsletters")],
21
  outputs="dataframe")
22
 
23
  # PLOTTING
utils/gradio_utils.py CHANGED
@@ -1,7 +1,7 @@
1
  import random
2
  import ipaddress
3
  import pandas as pd
4
-
5
 
6
  ### SWAKS ###
7
 
@@ -230,36 +230,68 @@ def generate_ips_per_subclass(ip_subclasses: str, num_of_ips: int) -> str:
230
  return "\n".join(ip_addresses)
231
 
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  ### GENERATE TOP LISTS ###
234
- def compute_offer(csv_file, days_lookback, min_sent, domain):
 
235
  # cmp_list = ['MSP', 'HOM', 'NTU', 'HCK', 'DDS', 'MNP', 'PSC', 'DTL', 'GVS', 'ANP', 'WDR', 'BSG'] #1
236
 
237
  #raw_df = pd.read_csv('tools/data/30.08.2023.gabriel.sabau.campanii.csv', parse_dates=['Data'])
238
- comcast_df = pd.read_csv(csv_file.name, parse_dates=['Data'])
239
  # raw_df = pd.read_csv(csv_file.name, parse_dates=['Data']) #1
240
 
241
  cols = ['Campanie', 'Oferta', 'Nume', 'Server', 'User',
242
  'Lista Custom', 'Data', 'HClicks', 'Clicks', 'Unscribers', 'Openers',
243
  'Click Open', 'Leads', 'CLike', 'Complains', 'Traps', 'Send']
244
- # comcast_df = raw_df[raw_df['Nume'].str.contains('|'.join(cmp_list))] #1
245
- # comcast_df = comcast_df[comcast_df['Domeniu'] == 'Comcast'] #2
246
- exclude_list = comcast_df[(comcast_df['Data'] > (pd.Timestamp('now') - pd.Timedelta(days=days_lookback))) \
247
- & (comcast_df['Domeniu'] == domain)]['Oferta'].unique()
248
- comcast_df = comcast_df[~comcast_df['Oferta'].isin(exclude_list)]
249
 
250
- comcast_df = comcast_df[comcast_df['Send'] > int(min_sent)]
251
- comcast_df = comcast_df[cols]
252
- comcast_df['Click Open'] = comcast_df['Click Open'].str.replace('%', '').astype(float)
253
-
254
- # comcast_df = comcast_df[comcast_df['Lista Custom'].str.contains('open')]
255
- comcast_df = comcast_df[comcast_df['Nume'].str.startswith("Aeon News") & comcast_df['Nume'].str.contains(r'\b[A-Z]{3}\b.*\b\d{4}\*?\s*(\(\d{4}\))?\b')]
256
-
257
- comcast_df.reset_index(drop=True, inplace=True)
258
-
259
- final_df = comcast_df.groupby(["Oferta", "Nume"])\
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  .agg( N=('Oferta', 'count'), send_avg=('Send', 'mean'), CO=('Click Open', 'mean'))\
261
  .sort_values(['CO', 'N'], ascending=False)
262
- final_df['send_avg'] = final_df['send_avg'].round(2).astype(float)
 
263
  final_df.reset_index(inplace=True)
264
 
265
  return final_df
 
1
  import random
2
  import ipaddress
3
  import pandas as pd
4
+ import re
5
 
6
  ### SWAKS ###
7
 
 
230
  return "\n".join(ip_addresses)
231
 
232
 
233
+ def _replace_numbers(input_string: str) -> str:
234
+ # Find the numbers before and inside the parentheses
235
+ match = re.search(r'(\d+)\s*\((\d+)\)', input_string)
236
+ if match:
237
+ # Replace the first set of numbers with the second set
238
+ replaced_string = input_string.replace(match.group(1), match.group(2), 1)
239
+ # Remove the parentheses and any surrounding whitespace
240
+ cleaned_string = re.sub(r'\(\d+\)', '', replaced_string).strip()
241
+ return cleaned_string
242
+ else:
243
+ return input_string
244
+
245
+ def _limit_chars(input_string: str, limit: int = 35) -> str:
246
+ return input_string[:limit]
247
+
248
  ### GENERATE TOP LISTS ###
249
+ def compute_offer(csv_file, days_lookback, min_sent, domain, offer_type):
250
+ pd.set_option('display.max_colwidth', 10)
251
  # cmp_list = ['MSP', 'HOM', 'NTU', 'HCK', 'DDS', 'MNP', 'PSC', 'DTL', 'GVS', 'ANP', 'WDR', 'BSG'] #1
252
 
253
  #raw_df = pd.read_csv('tools/data/30.08.2023.gabriel.sabau.campanii.csv', parse_dates=['Data'])
254
+ df_all = pd.read_csv(csv_file.name, parse_dates=['Data'])
255
  # raw_df = pd.read_csv(csv_file.name, parse_dates=['Data']) #1
256
 
257
  cols = ['Campanie', 'Oferta', 'Nume', 'Server', 'User',
258
  'Lista Custom', 'Data', 'HClicks', 'Clicks', 'Unscribers', 'Openers',
259
  'Click Open', 'Leads', 'CLike', 'Complains', 'Traps', 'Send']
260
+ # df_all = raw_df[raw_df['Nume'].str.contains('|'.join(cmp_list))] #1
261
+ # df_all = df_all[df_all['Domeniu'] == 'Comcast'] #2
262
+ exclude_list = df_all[(df_all['Data'] > (pd.Timestamp('now') - pd.Timedelta(days=days_lookback))) \
263
+ & (df_all['Domeniu'] == domain)]['Oferta'].unique()
264
+ df_all = df_all[~df_all['Oferta'].isin(exclude_list)]
265
 
266
+ df_all = df_all[df_all['Send'] > int(min_sent)]
267
+ df_all = df_all[cols]
268
+ df_all['Click Open'] = df_all['Click Open'].str.replace('%', '').astype(float)
269
+ # fixed a blank line in the csv
270
+ df_all = df_all[df_all["Oferta"] != " "]
271
+
272
+ # Limit the characters in the "Nume" column
273
+ # df_all["Nume"] = df_all["Nume"].apply(_limit_chars)
274
+
275
+ # Filter for newsletters or offers
276
+ if offer_type == "Newsletters":
277
+ df_all = df_all[df_all['Nume'].str.startswith("Aeon News") & \
278
+ (~df_all['Nume'].str.contains(r'\(\d{4}\)')) & \
279
+ (df_all['Nume'].str.contains(r' \d{4}$'))]
280
+ elif offer_type == "Offers":
281
+ df_all = df_all[~df_all['Nume'].str.startswith("Aeon News")]
282
+ # Compress the newsletter names
283
+ # df_all = df_all[df_all['Nume'].str.contains(r'\b[A-Z]{3}\b.*\b\d{4}\*?\s*(\(\d{4}\))?\b')]
284
+ # df_all['Nume'] = df_all['Nume'].apply(_replace_numbers)
285
+ # exclude again after the transformation
286
+ # df_all = df_all[~df_all['Oferta'].isin(exclude_list)]
287
+
288
+ df_all.reset_index(drop=True, inplace=True)
289
+
290
+ final_df = df_all.groupby(["Oferta", "Nume"])\
291
  .agg( N=('Oferta', 'count'), send_avg=('Send', 'mean'), CO=('Click Open', 'mean'))\
292
  .sort_values(['CO', 'N'], ascending=False)
293
+ final_df['send_avg'] = final_df['send_avg'].astype(int)
294
+ final_df['CO'] = final_df['CO'].round(2).astype(float)
295
  final_df.reset_index(inplace=True)
296
 
297
  return final_df