from copy import deepcopy from meta_feature_extraction.simple_stats import simple_stats from meta_feature_extraction.trace_length import trace_length from meta_feature_extraction.trace_variant import trace_variant from meta_feature_extraction.activities import activities from meta_feature_extraction.start_activities import start_activities from meta_feature_extraction.end_activities import end_activities from meta_feature_extraction.entropies import entropies from pm4py import discover_petri_net_inductive as inductive_miner from pm4py import generate_process_tree from pm4py import save_vis_petri_net, save_vis_process_tree from pm4py.algo.filtering.log.variants import variants_filter from pm4py.algo.simulation.tree_generator import algorithm as tree_generator from pm4py.algo.simulation.playout.process_tree import algorithm as playout from pm4py.objects.conversion.log import converter as log_converter from pm4py.objects.log.exporter.xes import exporter as xes_exporter from pm4py.objects.log.importer.xes import importer as xes_importer from pm4py.objects.log.util import dataframe_utils from pm4py.sim import play_out import matplotlib.image as mpimg import os import pandas as pd import streamlit as st OUTPUT_PATH = "output" SAMPLE_EVENTS = 500 @st.cache(allow_output_mutation=True) def load_from_xes(uploaded_file): bytes_data = uploaded_file.getvalue() log1 = xes_importer.deserialize(bytes_data) get_stats(log1) return log1 @st.cache def load_from_csv(uploaded_file, sep): if uploaded_file is not None: df = pd.read_csv(uploaded_file, sep=sep, index_col=False) return df def get_stats(log, save=True): """Returns the statistics of an event log.""" num_traces = len(log) num_events = sum([len(c) for c in log]) num_utraces = len(variants_filter.get_variants(log)) if save: st.session_state["num_traces"] = num_traces st.session_state["num_events"] = num_events st.session_state["num_utraces"] = num_utraces return num_utraces, num_traces, num_events #@st.cache def df_to_log(df, case_id, activity, timestamp): df.rename(columns={case_id: 'case:concept:name', activity: 'concept:name', timestamp: "time:timestamp"}, inplace=True) temp = dataframe_utils.convert_timestamp_columns_in_df(df) #temp = temp.sort_values(timestamp) log = log_converter.apply(temp) return log, 'concept:name', "time:timestamp" def read_uploaded_file(uploaded_file): extension = uploaded_file.name.split('.')[-1] log_name = uploaded_file.name.split('.')[-2] st.sidebar.write("Loaded ", extension.upper(), '-File: ', uploaded_file.name) if extension == "xes": event_log = load_from_xes(uploaded_file) log_columns = [*list(event_log[0][0].keys())] convert_button = False case_id = "case:concept:name" activity = "concept:name" timestamp = "time:timestamp" default_act_id = log_columns.index("concept:name") default_tst_id = log_columns.index("time:timestamp") event_df = log_converter.apply(event_log, variant=log_converter.Variants.TO_DATA_FRAME) df_path = OUTPUT_PATH+"/"+log_name+".csv" event_df.to_csv(df_path, sep =";", index=False) return event_log, event_df, case_id, activity elif extension == "csv": sep = st.sidebar.text_input("Columns separator", ";") event_df = load_from_csv(uploaded_file, sep) old_df = deepcopy(event_df) log_columns = event_df.columns case_id = st.sidebar.selectbox("Choose 'case' column:", log_columns) activity = st.sidebar.selectbox("Choose 'activity' column:", log_columns, index=0) timestamp = st.sidebar.selectbox("Choose 'timestamp' column:", log_columns, index=0) convert_button = st.sidebar.button('Confirm selection') if convert_button: temp = deepcopy(event_df) event_log, activity, timestamp = df_to_log(temp, case_id, activity, timestamp) #xes_exporter.apply(event_log, INPUT_XES) log_columns = [*list(event_log[0][0].keys())] st.session_state['log'] = event_log return event_log, event_df, case_id, activity def sample_log_traces(complete_log, sample_size): ''' Samples random traces out of logs. So that number of events is slightly over SAMPLE_SIZE. :param complete_log: Log extracted from xes ''' log_traces = variants_filter.get_variants(complete_log) keys = list(log_traces.keys()) sample_traces = {} num_evs = 0 while num_evs < sample_size: if len(keys) == 0: break random_trace = keys.pop() sample_traces[random_trace] = log_traces[random_trace] evs = sum([len(case_id) for case_id in sample_traces[random_trace]]) num_evs += evs log1 = variants_filter.apply(complete_log, sample_traces) return log1 def show_process_petrinet(event_log, filter_info, OUTPUT_PATH): OUTPUT_PLOT = f"{OUTPUT_PATH}_{filter_info}".replace(":","").replace(".","")+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE try: fig_pt = mpimg.imread(OUTPUT_PLOT) st.write("Loaded from memory") except FileNotFoundError: net, im, fm = inductive_miner(event_log) # parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99, # pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}) #parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"} save_vis_petri_net(net, im, fm, OUTPUT_PLOT) st.write("Saved in: ", OUTPUT_PLOT) fig_pt = mpimg.imread(OUTPUT_PLOT) st.image(fig_pt) def show_loaded_event_log(event_log, event_df): get_stats(event_log) st.write("### Loaded event-log") col1, col2 = st.columns(2) with col2: st.dataframe(event_df) with col1: show_process_petrinet(event_log, None, OUTPUT_PATH+"running-example") def extract_meta_features(log, log_name): mtf_cols = ["log", "n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "n_events", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7"] features = [log_name] features.extend(simple_stats(log)) features.extend(trace_length(log)) features.extend(trace_variant(log)) features.extend(activities(log)) features.extend(start_activities(log)) features.extend(end_activities(log)) features.extend(entropies(log_name, OUTPUT_PATH)) mtf = pd.DataFrame([features], columns=mtf_cols) st.dataframe(mtf) return mtf def generate_pt(mtf): OUTPUT_PLOT = f"{OUTPUT_PATH}/generated_pt".replace(":","").replace(".","")#+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE st.write("### PT Gen configurations") col1, col2, col3, col4, col5, col6 = st.columns(6) with col1: param_mode = st.text_input('Mode', str(round(mtf['activities_median'].iat[0]))) #? st.write("Sum of probabilities must be one") with col2: param_min = st.text_input('Min', str(mtf['activities_min'].iat[0])) param_seq = st.text_input('Probability Sequence', 0.25) with col3: param_max = st.text_input('Max', str(mtf['activities_max'].iat[0])) param_cho = st.text_input('Probability Choice (XOR)', 0.25) with col4: param_nmo = st.text_input('Number of models', 1) param_par = st.text_input('Probability Parallel', 0.25) with col5: param_dup = st.text_input('Duplicates', 0) param_lop = st.text_input('Probability Loop', 0.25) with col6: param_sil = st.text_input('Silent', 0.2) param_or = st.text_input('Probability Or', 0.0) PT_PARAMS = {tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MODE: round(float(param_mode)), #most frequent number of visible activities tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MIN: int(param_min), #minimum number of visible activities tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MAX: int(param_max), #maximum number of visible activities tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SEQUENCE: float(param_seq), #probability to add a sequence operator to tree tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.CHOICE: float(param_cho), #probability to add a choice (XOR) operator to tree tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.PARALLEL: float(param_par), #probability to add a parallel operator to tree tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.LOOP: float(param_lop), #probability to add a loop operator to tree tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.OR: float(param_or), #probability to add an or operator to tree tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SILENT: float(param_sil), #probability to add silent activity to a choice or loop operator tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.DUPLICATE: int(param_dup), #probability to duplicate an activity label tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.NO_MODELS: int(param_nmo)} #number of trees to generate from model population process_tree = generate_process_tree(parameters=PT_PARAMS) save_vis_process_tree(process_tree, OUTPUT_PLOT+"_tree.png") st.write("### Playout configurations") param_ntraces = st.text_input('Number of traces', str(mtf['n_traces'].iat[0])) PO_PARAMS = {playout.Variants.BASIC_PLAYOUT.value.Parameters.NO_TRACES : int(param_ntraces)} ptgen_log = play_out(process_tree, parameters=PO_PARAMS) net, im, fm = inductive_miner(ptgen_log) save_vis_petri_net(net, im, fm, OUTPUT_PLOT+".png") st.write("Saved in: ", OUTPUT_PLOT) fig_pt_net = mpimg.imread(OUTPUT_PLOT+".png") fig_pt_tree = mpimg.imread(OUTPUT_PLOT+"_tree.png") fcol1, fcol2 = st.columns(2) with fcol1: st.image(fig_pt_tree) with fcol2: st.image(fig_pt_net) extract_meta_features(ptgen_log, "gen_pt") if __name__ == '__main__': st.set_page_config(layout='wide') """ # Event Log Generator """ start_options = ['Event-Log', 'Meta-features'] start_preference = st.sidebar.selectbox("Do you want to start with a log or with metafeatures?", start_options,0) #lets_start = st.sidebar.button("Let's start with "+start_preference+'!') if start_preference==start_options[0]: st.sidebar.write("Upload a dataset in csv or xes-format:") uploaded_file = st.sidebar.file_uploader("Pick a logfile") bar = st.progress(0) os.makedirs(OUTPUT_PATH, exist_ok=True) event_log = st.session_state['log'] if "log" in st.session_state else None if uploaded_file: event_log, event_df, case_id, activity_id = read_uploaded_file(uploaded_file) #event_log = deepcopy(event_log) use_sample = st.sidebar.checkbox('Use random sample', True) if use_sample: sample_size = st.sidebar.text_input('Sample size of approx number of events', str(SAMPLE_EVENTS)) sample_size = int(sample_size) event_log = sample_log_traces(event_log, sample_size) sample_cases = [event_log[i].attributes['concept:name'] for i in range(0, len(event_log))] event_df = event_df[event_df[case_id].isin(sample_cases)] show_loaded_event_log(event_log, event_df) ext_mtf = extract_meta_features(event_log, "running-example") generate_pt(ext_mtf) elif start_preference==start_options[1]: LOG_COL = 'log' st.sidebar.write("Upload a dataset in csv-format") uploaded_file = st.sidebar.file_uploader("Pick a file containing meta-features") bar = st.progress(0) os.makedirs(OUTPUT_PATH, exist_ok=True) event_log = st.session_state[LOG_COL] if "log" in st.session_state else None if uploaded_file: sep = st.sidebar.text_input("Columns separator", ";") mtf = load_from_csv(uploaded_file, sep) st.dataframe(mtf) log_options = mtf['log'].unique() log_preference = st.selectbox("What log should we use for generating a new event-log?", log_options,1) mtf_selection = mtf[mtf[LOG_COL]==log_preference] generate_pt(mtf_selection) st.write("##### Original") st.write(mtf_selection)