Spaces:
Sleeping
Sleeping
import gradio as gr | |
from gradio_calendar import Calendar | |
import datetime | |
import requests | |
import pandas as pd | |
import os | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
import calendar | |
url = "https://api.brevo.com/v3/smtp/statistics/events" | |
headers = { | |
'accept': 'application/json', | |
'api-key': os.getenv('BREVO_API_KEY') | |
} | |
events_cols = [ | |
'collectivite', | |
'email', | |
'date', | |
'subject', | |
'event', | |
'g_men', | |
'g_conso', | |
'ressource', | |
] | |
# Recursively filter logs to get all logs | |
def get_events(limit=5000, offset=0, **kwargs): | |
try: | |
# Get all your transactional email activity (unaggregated events) | |
params = {'limit': limit, 'offset': offset} | |
params.update(kwargs) | |
api_response = requests.get(url=url, params=params, headers=headers) | |
response = api_response.json() | |
events = response['events'] | |
print(f"- Found {len(events)} events (limit={limit} - offset={offset}).") | |
except Exception as e: | |
print("Exception when calling TransactionalEmailsApi->get_email_event_report: %s\n" % e) | |
events = [] | |
if len(events) < limit: | |
# print("All logs found.") | |
return events | |
else: | |
return events + get_events(limit=limit, offset=offset+limit, **kwargs) | |
def get_all_events(year, months): | |
df = pd.DataFrame() | |
for month in months: | |
startDate = f'{year}-{month:02d}-01' | |
endDate = f'{year}-{month:02d}-{calendar.monthrange(year, month)[1]}' | |
today = datetime.date.today().strftime('%Y-%m-%d') | |
if endDate > today: | |
endDate = today | |
if startDate < today: | |
print(f'{year}.{month:02d}:') | |
events = get_events(startDate=startDate, endDate=endDate, sort='asc') | |
df = pd.concat([df, pd.DataFrame(events)]) | |
print(f'=> Found {len(events)} month events.\n') | |
print(f'=> Found {len(df)} total events.') | |
return df | |
# Add stratification datas | |
def get_stratification_datas(events_df, users_df): | |
# Join dataset | |
dataset = users_df.merge(events_df, left_on='mail', right_on='email', how='left') | |
filter = dataset['messageId'].notna() | |
dataset = dataset[filter] | |
# dataset = dataset.drop(columns='date').drop_duplicates().reset_index(drop=True) | |
return dataset | |
# Filter event | |
def filter_datas(df, filters): | |
# print("Filters:", filters) | |
if not filters: | |
return df | |
k, v = filters[0] | |
# print(f"Filter on {k} = {v}") | |
filter = df[k].isin(v) | |
return filter_datas(df[filter], filters[1:]) | |
# Filter on date | |
def filter_dates(df, start_date, end_date): | |
start = np.datetime64(start_date) if start_date else np.datetime64(events_df['date'].min()) | |
end = np.datetime64(end_date) if end_date else np.datetime64(events_df['date'].max()) | |
filter = (df['date']>=start) & (df['date']<end) | |
return df[filter].drop(columns='date').drop_duplicates().reset_index(drop=True) | |
strate_dict = { | |
'g_men': 'Ménage', | |
'g_conso': 'Consommation' | |
} | |
events_dict = { | |
'delivered': 'Envoyé', | |
'opened': 'Ouvert', | |
'loadedByProxy': 'Ouvert (Apple)', | |
'clicks': 'Cliqué', | |
'unsubscribed': 'Désinscrit' | |
} | |
# Load users | |
filename = "dataset.csv" | |
users_df = pd.read_csv(filename) | |
print(f'- Loaded users datas: {users_df.shape}.') | |
# Load consumptions | |
filename = "consos.csv" | |
consos_df = pd.read_csv(filename) | |
consos_df['periode'] = consos_df['periode'].astype(str).replace('2023.1', '2023.10') | |
print(f'- Loaded consos datas: {consos_df.shape}.') | |
# Load log events | |
print('Load events dataframe...') | |
events_df = get_all_events(year=2024, | |
months=list(range(5, 11))) | |
# Remove test users | |
filter = events_df['email'].isin(['[email protected], anne@thegoodtrack', '[email protected]', '[email protected]', '[email protected]', '[email protected]']) | |
events_df = events_df[~filter].reset_index() | |
# Remove impact mails | |
# filter = events_df['templateId'].isin([39, 40]) | |
# events_df = events_df[~filter].reset_index() | |
# Cast datetime column | |
events_df['date'] = pd.to_datetime(events_df['date']).dt.tz_localize(None) | |
print('Build dataset...') | |
dataset = get_stratification_datas(events_df, users_df) | |
# Event col | |
filter = dataset['event'].isin(events_dict.keys()) | |
dataset = dataset[filter] | |
dataset['event'] = dataset['event'].map(events_dict) | |
# Rename subject | |
com_sub = 'savez-vous comment se porte votre ressource en eau ?' | |
subjects = [com_sub.capitalize() if com_sub in sub else sub for sub in dataset['subject']] | |
dataset['subject'] = subjects | |
# Strates col | |
# dataset.rename(columns=strate_dict, inplace=True) | |
print(f'Dataset ready: {dataset.shape}.') | |
def mails(community, subjects, start_date, end_date, gmens, gconsos, ressources): | |
# Filter dates | |
events_df = filter_dates(dataset, start_date, end_date) | |
# Filter events | |
filters = [] | |
if community: | |
filters.append(('collectivite', community)) | |
if subjects: | |
filters.append(('subject', subjects)) | |
if gmens: | |
filters.append(('g_men', gmens)) | |
if gconsos: | |
filters.append(('g_conso', gconsos)) | |
if ressources: | |
filters.append(('ressource', ressources)) | |
events_df = filter_datas(events_df, filters) | |
events_df = events_df[['email', 'subject', 'event']].drop_duplicates() | |
events = events_df['event'].value_counts() | |
# Build fig | |
#x = events.index | |
#y = events.values | |
#fig, ax = plt.subplots() | |
#bars = ax.bar(x, y, color=sns.palettes.mpl_palette('Dark2')) | |
#ax.bar_label(bars) | |
# Get logs | |
filter = events_df['event'] == 'Envoyé' | |
total = len(events_df[filter]) | |
if total > 0: | |
logs = {f'{k} ({v})':v/total for k, v in events.to_dict().items()} | |
else: | |
logs = {f'{k} ({v})':v for k, v in events.to_dict().items()} | |
return logs | |
def consos(community, periods, group): | |
# Filter consos | |
filters = [] | |
if community: | |
filters.append(('collectivite', community)) | |
if periods: | |
filters.append(('periode', periods)) | |
if group: | |
filters.append(('group', group)) | |
datas = filter_datas(consos_df, filters) | |
width = 0.4 # the width of the bars | |
multiplier = 0 | |
periods = datas['periode'].astype(str).unique() | |
x = np.arange(len(periods)) | |
test_group = datas[datas['group']=='test'][['periode', 'consommation']] | |
control_group = datas[datas['group']=='control'][['periode', 'consommation']] | |
# fig_sum | |
fig_sum, ax_sum = plt.subplots(layout='constrained', figsize=(max(len(x), 10), 10)) | |
group_consos_sum = { | |
'test': test_group.groupby('periode').sum()['consommation'].tolist(), | |
'control': control_group.groupby('periode').sum()['consommation'].tolist(), | |
} | |
for group, consumptions in group_consos_sum.items(): | |
offset = width * multiplier | |
rects = ax_sum.bar(x + offset, [round(c/(1000*1000)) for c in consumptions], width, label=group) | |
ax_sum.bar_label(rects, padding=3) | |
multiplier += 1 | |
ax_sum.set_title('Evolution de la consommation cumulée par groupe test/contrôle') | |
ax_sum.set_ylabel('Consommation (dam3 = 1000 m3)') | |
ax_sum.set_xlabel('Période') | |
ax_sum.set_xticks(x + width, periods) | |
ax_sum.legend(loc='upper right', ncols=2) | |
# fig_med | |
width = 0.4 # the width of the bars | |
multiplier = 0 | |
fig_med, ax_med = plt.subplots(layout='constrained', figsize=(max(len(x), 10), 5)) | |
group_consos_med = { | |
'test': test_group.groupby('periode').median()['consommation'].tolist(), | |
'control': control_group.groupby('periode').median()['consommation'].tolist(), | |
} | |
for group, consumptions in group_consos_med.items(): | |
offset = width * multiplier | |
rects = ax_med.bar(x + offset, [round(c/1000, 1) for c in consumptions], width, label=group) | |
ax_med.bar_label(rects, padding=3) | |
multiplier += 1 | |
ax_med.set_title('Evolution de la consommation médiane par groupe test/contrôle') | |
ax_med.set_ylabel('Consommation (m3)') | |
ax_med.set_xlabel('Période') | |
ax_med.set_xticks(x + width, periods) | |
ax_med.legend(loc='upper right', ncols=2) | |
# fig_mean | |
width = 0.4 # the width of the bars | |
multiplier = 0 | |
fig_mean, ax_mean = plt.subplots(layout='constrained', figsize=(max(len(x), 10), 5)) | |
group_consos_mean = { | |
'test': test_group.groupby('periode').mean()['consommation'].tolist(), | |
'control': control_group.groupby('periode').mean()['consommation'].tolist(), | |
} | |
for group, consumptions in group_consos_mean.items(): | |
offset = width * multiplier | |
rects = ax_mean.bar(x + offset, [round(c/1000, 1) for c in consumptions], width, label=group) | |
ax_mean.bar_label(rects, padding=3) | |
multiplier += 1 | |
ax_mean.set_title('Evolution de la consommation moyenne par groupe test/contrôle') | |
ax_mean.set_ylabel('Consommation (m3)') | |
ax_mean.set_xlabel('Période') | |
ax_mean.set_xticks(x + width, periods) | |
ax_mean.legend(loc='upper right', ncols=2) | |
return fig_sum, fig_med, fig_mean | |
def results(subjects, start_date, end_date, gmens, gconsos, ressources): | |
community = None | |
campaign = mails(community, subjects, start_date, end_date, gmens, gconsos, ressources) | |
start = start_date if start_date else events_df['date'].min() | |
end = end_date if end_date else events_df['date'].max() | |
periods = [start.strftime("%Y.%m"), end.strftime("%Y.%m")] | |
group = None | |
periods= None | |
consos_sum, consos_med, consos_mean = consos(community, periods, group) | |
return campaign, consos_med #consos_sum, consos_mean | |
main_interface = gr.Interface( | |
fn=results, | |
inputs=[ | |
#gr.Dropdown(choices=list(dataset['collectivite'].unique()), multiselect=True, allow_custom_value=False, label="Collectivité", info="Choisir une ou plusieurs collectivités"), | |
gr.Dropdown(choices=list(dataset['subject'].unique()), multiselect=True, allow_custom_value=False, label="Mél envoyé", info="Choisir un ou plusieurs emails envoyés"), | |
Calendar(type="datetime", value=events_df['date'].min(), label="Date de début", info="Choisir une date de début"), | |
Calendar(type="datetime", value=events_df['date'].max(), label="Date de fin", info="Choisir une date de fin"), | |
#gr.Dropdown(choices=list(dataset['event'].unique()), multiselect=True, allow_custom_value=False, label="Evénement", info="Choisir un ou plusieurs événements"), | |
gr.Dropdown(choices=list(dataset['g_men'].unique()), multiselect=True, allow_custom_value=False, label="Type de ménage", info="Choisir un ou plusieurs groupes"), | |
gr.Dropdown(choices=list(dataset['g_conso'].unique()), multiselect=True, allow_custom_value=False, label="Type de consommation", info="Choisir un ou plusieurs groupes"), | |
gr.Dropdown(choices=list(dataset['ressource'].unique()), multiselect=True, allow_custom_value=False, label="Ressource", info="Choisir une ou plusieurs ressources"), | |
#gr.Textbox(label="Recherche", info="Votre recherche (mots séparés par des virgules pour des recherches multiples)"), | |
#gr.Dropdown(choices=list(origins.keys()), value=list(origins.keys())[0], label="Origine", info="Choisir un type de donnée à interroger"), | |
#gr.Number(value=10, label="Nombre de résultats", info="Nombre de résultats attendus") | |
], | |
outputs=[ | |
gr.Label(label="Méls"), | |
#gr.Plot(label="Cumul"), | |
gr.Plot(label="Médian"), | |
#gr.Plot(label="Moyenne"), | |
], | |
examples=[ | |
], | |
cache_examples=False | |
) | |
#gradio_app = gr.TabbedInterface([main_interface, consos_interface], tab_names=['Méls', 'Consommations'], title="Préservons l'eau - Tableau de bord") | |
gradio_app = main_interface | |
if __name__ == "__main__": | |
gradio_app.launch(auth=("alerte", "renforcée"), share=True) | |