linpershey commited on
Commit
a822c3b
·
1 Parent(s): 4b289b2

allow select columns

Browse files
Files changed (1) hide show
  1. gapp.py +34 -9
gapp.py CHANGED
@@ -23,20 +23,22 @@ logger = logging.getLogger(__name__)
23
  logger.setLevel(logging.DEBUG)
24
 
25
 
26
- def get_data(temp_file, state: dict):
27
  # print(f"temp_file: {temp_file}")
28
  if isinstance(temp_file, str):
29
  # df = pd.read_csv(StringIO(temp_file), parse_dates=[ "Start", "Finish"])
30
- df = pd.read_csv(temp_file, sep=';', dtype={'case_id': str}, parse_dates = ['timestamp'])
31
- df.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp']) # format='%Y-%m-%d %H:%M:%S'
32
  else:
33
  # df = pd.read_csv(temp_file.name, ) # parse_dates=[ "Start", "Finish"]
34
- df = pd.read_csv(temp_file.name, sep=';', dtype={'case_id': str}, parse_dates = ['timestamp'])
35
- df.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'])
36
  # logger.debug(df.head())
37
  # logger.debug(df.dtypes)
38
  state['df'] = df
39
- return df, state
 
 
 
 
40
 
41
 
42
  def get_stats(state: dict):
@@ -101,6 +103,20 @@ def get_process_map_activities_connections( activity_rank: int = 0, connection_r
101
  return chart, state
102
 
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  ## --- block --- ##
105
  css = """
106
  h1 {
@@ -114,11 +130,19 @@ with demo:
114
  state = gr.State(value={})
115
  with gr.Row():
116
  upl_btn = gr.UploadButton(label="Upload", file_types = ['.csv'], file_count = "single")
117
- # with gr.Row('Data Preview'):
118
  with gr.Accordion('Data Preview'):
119
  df = gr.Dataframe()
120
- upl_btn.upload( fn=get_data, inputs = [upl_btn, state], outputs=[df, state])
121
-
 
 
 
 
 
 
 
 
 
122
  with gr.Row():
123
  with gr.Tab('Data Explorer'):
124
  # outputs.append(gr.Dataframe( label="Event logs"))
@@ -129,6 +153,7 @@ with demo:
129
  chart2 = gr.BarPlot( label="Case Lead Time Stats")
130
  chart3 = gr.BarPlot( label="Case Average Activity Time Stats")
131
  de_btn.click( fn=get_stats, inputs = [state], outputs=[ summary, chart1, chart2, chart3, state])
 
132
  with gr.Tab('Variant Explorer'):
133
  ve_btn = gr.Button("Get Variants")
134
  top_k_variant_selector = gr.Slider(0, 10, value=1, step=1, label="Top-K", info="選擇 Variant 數量(0: 全選)")
 
23
  logger.setLevel(logging.DEBUG)
24
 
25
 
26
+ def get_data(temp_file, case_col, activity_col, timestamp_col, state: dict):
27
  # print(f"temp_file: {temp_file}")
28
  if isinstance(temp_file, str):
29
  # df = pd.read_csv(StringIO(temp_file), parse_dates=[ "Start", "Finish"])
30
+ df = pd.read_csv(temp_file, sep=';|,')
 
31
  else:
32
  # df = pd.read_csv(temp_file.name, ) # parse_dates=[ "Start", "Finish"]
33
+ df = pd.read_csv(temp_file.name, sep=';|,')
 
34
  # logger.debug(df.head())
35
  # logger.debug(df.dtypes)
36
  state['df'] = df
37
+ return df, \
38
+ gr.Dropdown( choices=list(df.columns), multiselect=False, label="Case", info="選擇 Case ID"), \
39
+ gr.Dropdown( choices=list(df.columns), multiselect=False, label="Activity", info="選擇 Activity ID"), \
40
+ gr.Dropdown( choices=list(df.columns), multiselect=False, label="Timestamp", info="選擇 Timestamp"), \
41
+ state
42
 
43
 
44
  def get_stats(state: dict):
 
103
  return chart, state
104
 
105
 
106
+ def etl( case_col, activity_col, timestamp_col, state: dict):
107
+ """
108
+ Argument
109
+ Return
110
+ """
111
+ df = state['df'].copy()
112
+ df.loc[:, case_col] = df[case_col].astype(str)
113
+ df.loc[:, activity_col] = df[activity_col].astype(str)
114
+ df.loc[:, timestamp_col] = pd.to_datetime(df[timestamp_col]) # format='%Y-%m-%d %H:%M:%S'
115
+ df.rename(columns={case_col: 'case_id', activity_col: 'activity', timestamp_col: 'timestamp'}, inplace=True)
116
+ state['df'] = df
117
+ return df, state
118
+
119
+
120
  ## --- block --- ##
121
  css = """
122
  h1 {
 
130
  state = gr.State(value={})
131
  with gr.Row():
132
  upl_btn = gr.UploadButton(label="Upload", file_types = ['.csv'], file_count = "single")
 
133
  with gr.Accordion('Data Preview'):
134
  df = gr.Dataframe()
135
+
136
+ with gr.Row():
137
+ case_col = gr.Dropdown( multiselect=False, label="Case", info="選擇 Case ID")
138
+ activity_col = gr.Dropdown( multiselect=False, label="Activity", info="選擇 Activity ID")
139
+ timestamp_col = gr.Dropdown( multiselect=False, label="Timestamp", info="選擇 Timestamp")
140
+
141
+ upl_btn.upload( fn=get_data, inputs = [upl_btn, case_col, activity_col, timestamp_col, state], outputs=[df, case_col, activity_col, timestamp_col, state])
142
+
143
+ column_btn = gr.Button("Select Columns")
144
+ column_btn.click( fn=etl, inputs = [ case_col, activity_col, timestamp_col, state], outputs=[df, state])
145
+
146
  with gr.Row():
147
  with gr.Tab('Data Explorer'):
148
  # outputs.append(gr.Dataframe( label="Event logs"))
 
153
  chart2 = gr.BarPlot( label="Case Lead Time Stats")
154
  chart3 = gr.BarPlot( label="Case Average Activity Time Stats")
155
  de_btn.click( fn=get_stats, inputs = [state], outputs=[ summary, chart1, chart2, chart3, state])
156
+
157
  with gr.Tab('Variant Explorer'):
158
  ve_btn = gr.Button("Get Variants")
159
  top_k_variant_selector = gr.Slider(0, 10, value=1, step=1, label="Top-K", info="選擇 Variant 數量(0: 全選)")