Spaces:
Sleeping
Sleeping
import os | |
import logging | |
from io import StringIO | |
from pathlib import Path | |
from collections import OrderedDict | |
import plotly.express as px | |
import gradio as gr | |
import pandas as pd | |
from dotenv import load_dotenv | |
# from PIL import Image | |
import matplotlib.pyplot as plt | |
# import cv2 | |
# import numpy as np | |
import plotly.graph_objects as go | |
import networkx as nx | |
from model import dfg2networkx, discover_process_map, discover_process_map_activities_connections, discover_process_map_variants, view_process_map | |
load_dotenv() | |
logger = logging.getLogger(__name__) | |
logger.setLevel(logging.DEBUG) | |
def get_data(temp_file, state: dict): | |
# print(f"temp_file: {temp_file}") | |
if isinstance(temp_file, str): | |
# df = pd.read_csv(StringIO(temp_file), parse_dates=[ "Start", "Finish"]) | |
df = pd.read_csv(temp_file, sep=';', dtype={'case_id': str}, parse_dates = ['timestamp']) | |
df.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp']) # format='%Y-%m-%d %H:%M:%S' | |
else: | |
# df = pd.read_csv(temp_file.name, ) # parse_dates=[ "Start", "Finish"] | |
df = pd.read_csv(temp_file.name, sep=';', dtype={'case_id': str}, parse_dates = ['timestamp']) | |
df.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp']) | |
# logger.debug(df.head()) | |
# logger.debug(df.dtypes) | |
state['df'] = df | |
return df, state | |
def get_stats(state: dict): | |
df = state.get('df', pd.DataFrame()).copy() | |
summary = pd.DataFrame({ | |
"metric": ["่ณๆ็ญๆธ", "Case ๆธ้", "Activity ๆธ้", "่ตทๅงๆ้", "็ตๆๆ้"], | |
"value": [ df.shape[0], df['case_id'].nunique(), df['activity'].nunique(), df['timestamp'].min(), df['timestamp'].max() ] | |
}) | |
case_stats = df.groupby( | |
by = ['case_id'], as_index=False | |
).agg(count = ('activity', len)).reset_index() | |
logger.debug(f"case stats: {case_stats}") | |
case_lead_time = df.groupby( | |
by = ['case_id'], as_index=False | |
).agg( duration = ('timestamp', lambda x: (x.max() - x.min()).total_seconds()//3600 )).reset_index() | |
def avg_duration(x): | |
return pd.Series({ "avg_duration": (x.timestamp.max() - x.timestamp.min()).total_seconds()//3600}) | |
case_avg_duration = df.groupby( | |
by = ['case_id'], as_index=False | |
).apply( | |
avg_duration | |
) | |
logger.debug(f"case lead time: {case_lead_time}") | |
return ( | |
summary, | |
gr.BarPlot( case_stats, x="case_id", y="count", title="Case Stats", tooltip = ["case_id", "count"], width=None), | |
gr.BarPlot( case_lead_time, x="case_id", y="duration", title="Case Lead Time", tooltip = ["case_id", "duration"], width=None), | |
gr.BarPlot( case_avg_duration, x="case_id", y="avg_duration", title="Case Average Duration", tooltip = ["case_id", "avg_duration"], width=None), | |
state | |
) | |
def get_process_map( state: dict = {}): | |
df = state.get('df', pd.DataFrame()).copy() | |
net, img = discover_process_map( df, type='petrinet') | |
return img, state | |
def get_process_map_variants( top_k: int = 1, state: dict = {}): | |
""" | |
""" | |
df = state.get('df', pd.DataFrame()).copy() | |
dfg, start_activities, end_activities = discover_process_map_variants( df, top_k, type='dfg') | |
top_variant_connections = OrderedDict(sorted(dfg.items(), key=lambda item: item[1], reverse=True)) | |
state['top_variant_connections'] = top_variant_connections | |
if 'top_variant' not in state and top_k == 1: | |
state['top_variant'] = {'dfg': dfg, 'start_activities': start_activities, 'end_activities': end_activities} | |
nx_graph = dfg2networkx( dfg, start_activities, end_activities) | |
chart = view_process_map( nx_graph, process_type='dfg', layout_type='sfdp') | |
return chart, state | |
def get_process_map_activities_connections( activity_rank: int = 0, connection_rank: int = 0, state: dict = {}): | |
""" | |
""" | |
df = state.get('df', pd.DataFrame()).copy() | |
dfg, start_activities, end_activities = discover_process_map_activities_connections( df, activity_rank = activity_rank, connection_rank = connection_rank, state = state) | |
nx_graph = dfg2networkx( dfg, start_activities, end_activities) | |
chart = view_process_map( nx_graph, process_type='dfg', layout_type='sfdp') | |
return chart, state | |
## --- block --- ## | |
css = """ | |
h1 { | |
text-align: center; | |
display:block; | |
} | |
""" | |
demo = gr.Blocks(css = css) | |
with demo: | |
gr.Markdown("# ๐ Process Dicovery ๐") | |
state = gr.State(value={}) | |
with gr.Row(): | |
upl_btn = gr.UploadButton(label="Upload", file_types = ['.csv'], file_count = "single") | |
# with gr.Row('Data Preview'): | |
with gr.Accordion('Data Preview'): | |
df = gr.Dataframe() | |
upl_btn.upload( fn=get_data, inputs = [upl_btn, state], outputs=[df, state]) | |
with gr.Row(): | |
with gr.Tab('Data Explorer'): | |
# outputs.append(gr.Dataframe( label="Event logs")) | |
de_btn = gr.Button("Get Stats") | |
with gr.Row(): | |
summary = gr.Dataframe( label="Summary", interactive=False, height=300) | |
chart1 = gr.BarPlot( label="Case Stats") | |
chart2 = gr.BarPlot( label="Case Lead Time Stats") | |
chart3 = gr.BarPlot( label="Case Average Activity Time Stats") | |
de_btn.click( fn=get_stats, inputs = [state], outputs=[ summary, chart1, chart2, chart3, state]) | |
with gr.Tab('Variant Explorer'): | |
ve_btn = gr.Button("Get Variants") | |
top_k_variant_selector = gr.Slider(0, 10, value=1, step=1, label="Top-K", info="้ธๆ Variant ๆธ้๏ผ0: ๅ จ้ธ๏ผ") | |
pmchart = gr.Plot( label="Process Map") | |
ve_btn.click( fn=get_process_map_variants, inputs = [ top_k_variant_selector, state], outputs=[ pmchart, state]) | |
with gr.Tab('Process Explorer'): | |
pe_btn = gr.Button("Get Activities & Connections") | |
with gr.Column(): | |
top_k_activity_selector = gr.Slider(0, 10, value=1, step=1, label="Activity", info="ใpendingใๅขๆธ Top Activity ๆธ้๏ผ0: ๅ จ้ธ๏ผ") | |
top_k_connection_selector = gr.Slider(0, 10, value=1, step=1, label="Connection", info="ๅขๆธ Top Connection ๆธ้๏ผ0: ๅ จ้ธ๏ผ") | |
pmchart = gr.Plot( label="Process Map") | |
pe_btn.click( fn=get_process_map_activities_connections, inputs = [ top_k_activity_selector, top_k_connection_selector, state], outputs=[ pmchart, state]) | |
with gr.Tab('Process Model'): | |
cc_btn = gr.Button("Get Process Model") | |
img = gr.Image( label="Process Model") | |
cc_btn.click( fn=get_process_map, inputs = [state], outputs=[ img, state]) | |
if __name__ == "__main__": | |
demo.launch( | |
# share=True, | |
server_name="0.0.0.0", | |
server_port=int(os.environ.get("PORT")), | |
auth=( os.environ.get("USER_NAME"), os.environ.get("PASSWORD")) | |
) | |