phitoduck commited on
Commit
64bdbaf
·
1 Parent(s): bc28645

plotting lines, but with matplotlib everywhere

Browse files
Files changed (1) hide show
  1. streamlit_app.py +24 -52
streamlit_app.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import matplotlib.pyplot as plt
4
  from datetime import datetime, time, date
5
  from typing import List, Dict, Any, Tuple
6
- from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
7
  from textwrap import dedent
8
 
9
  # Constants
@@ -21,13 +21,15 @@ def main():
21
 
22
  if not st.session_state.df.empty:
23
  display_dataframe("Raw Event Data", st.session_state.df)
 
24
 
25
- # Section 2 - Calculate Percentile
26
- st.header("Section 2 - Calculate Percentile")
27
- percentile_form()
28
 
29
- if not st.session_state.percentile_df.empty:
30
- display_dataframe("Aggregated Summary Data", st.session_state.percentile_df)
 
31
 
32
  # Section 3 - Summary Data Aggregated by Period
33
  st.header("Section 3 - Summary Data Aggregated by Period")
@@ -35,13 +37,13 @@ def main():
35
 
36
  if not st.session_state.summary_by_period_df.empty:
37
  display_dataframe("Summary Data Aggregated by Period", st.session_state.summary_by_period_df)
 
38
 
39
  # Section 4 - Evaluate Alarm State
40
  st.header("Section 4 - Evaluate Alarm State")
41
  alarm_state_form()
42
 
43
  if not st.session_state.alarm_state_df.empty:
44
- plot_time_series(st.session_state.summary_by_period_df, st.session_state.threshold_input, st.session_state.alarm_condition_input, st.session_state.evaluation_range_input)
45
  display_alarm_state_evaluation(st.session_state.alarm_state_df)
46
 
47
  display_key_tables()
@@ -49,8 +51,8 @@ def main():
49
  def initialize_session_state() -> None:
50
  if 'df' not in st.session_state:
51
  st.session_state.df = pd.DataFrame()
52
- if 'percentile_df' not in st.session_state:
53
- st.session_state.percentile_df = pd.DataFrame()
54
  if 'summary_by_period_df' not in st.session_state:
55
  st.session_state.summary_by_period_df = pd.DataFrame()
56
  if 'alarm_state_df' not in st.session_state:
@@ -75,29 +77,26 @@ def generate_data_form() -> None:
75
  null_percentage=null_percentage_input
76
  )
77
 
78
- def percentile_form() -> None:
79
  freq_input = st.selectbox("Period (bin)", ['1min', '5min', '15min'], key='freq_input', help="Select the frequency for aggregating the data.")
80
- percentile_input = st.slider("Percentile", min_value=0.0, max_value=1.0, value=0.95, key='percentile_input', help="Select the percentile for calculating the aggregated summary data.")
 
 
 
 
 
81
  if not st.session_state.df.empty:
82
- st.session_state.percentile_df = calculate_percentile(st.session_state.df, freq_input, percentile_input)
83
 
84
  def summary_by_period_form() -> None:
85
  period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
86
  if not st.session_state.df.empty:
87
  st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
88
- else:
89
- st.warning("No data available to aggregate.")
90
 
91
  def alarm_state_form() -> None:
92
  threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
93
  datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
94
  evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
95
- aggregation_function_input = st.selectbox(
96
- "Aggregation Function",
97
- ['p50', 'p95', 'p99', 'max', 'min', 'average'],
98
- key='aggregation_function_input',
99
- help="Select the aggregation function for visualizing the data and computing alarms."
100
- )
101
  alarm_condition_input = st.selectbox(
102
  "Alarm Condition",
103
  ['>', '>=', '<', '<='],
@@ -110,7 +109,7 @@ def alarm_state_form() -> None:
110
  threshold=threshold_input,
111
  datapoints_to_alarm=datapoints_to_alarm_input,
112
  evaluation_range=evaluation_range_input,
113
- aggregation_function=aggregation_function_input,
114
  alarm_condition=alarm_condition_input
115
  )
116
 
@@ -118,9 +117,9 @@ def display_dataframe(title: str, df: pd.DataFrame) -> None:
118
  st.write(title)
119
  st.dataframe(df)
120
 
121
- def plot_time_series(df: pd.DataFrame, threshold: int, alarm_condition: str, evaluation_range: int) -> None:
122
  timestamps = df['Timestamp']
123
- response_times = df[st.session_state.aggregation_function_input]
124
 
125
  segments = []
126
  current_segment = {'timestamps': [], 'values': []}
@@ -141,38 +140,12 @@ def plot_time_series(df: pd.DataFrame, threshold: int, alarm_condition: str, eva
141
 
142
  color = 'tab:blue'
143
  ax1.set_xlabel('Timestamp')
144
- ax1.set_ylabel('Response Time (ms)', color=color)
145
 
146
  for segment in segments:
147
  ax1.plot(segment['timestamps'], segment['values'], color=color, linewidth=0.5)
148
  ax1.scatter(segment['timestamps'], segment['values'], color=color, s=10)
149
 
150
- line_style = '--' if alarm_condition in ['<', '>'] else '-'
151
- ax1.axhline(y=threshold, color='r', linestyle=line_style, linewidth=0.8, label='Threshold')
152
- ax1.tick_params(axis='y', labelcolor=color)
153
-
154
- if alarm_condition in ['<=', '<']:
155
- ax1.fill_between(timestamps, 0, threshold, color='pink', alpha=0.3)
156
- else:
157
- ax1.fill_between(timestamps, threshold, response_times.max(), color='pink', alpha=0.3)
158
-
159
- period_indices = range(len(df))
160
- ax2 = ax1.twiny()
161
- ax2.set_xticks(period_indices)
162
- ax2.set_xticklabels(period_indices, fontsize=8)
163
- ax2.set_xlabel('Time Periods', fontsize=8)
164
- ax2.xaxis.set_tick_params(width=0.5)
165
-
166
- for idx in period_indices:
167
- if idx % evaluation_range == 0:
168
- ax1.axvline(x=df['Timestamp'].iloc[idx], color='green', linestyle='-', alpha=0.3)
169
- max_value = max(filter(lambda x: x is not None, df[st.session_state.aggregation_function_input]))
170
- ax1.text(df['Timestamp'].iloc[idx], max_value * 0.95, f"[{idx // evaluation_range}]", rotation=90, verticalalignment='bottom', color='grey', alpha=0.7, fontsize=8)
171
- else:
172
- ax1.axvline(x=df['Timestamp'].iloc[idx], color='grey', linestyle='--', alpha=0.3)
173
-
174
- ax1.annotate('Alarm threshold', xy=(0.98, threshold), xycoords=('axes fraction', 'data'), ha='right', va='bottom', fontsize=8, color='red', backgroundcolor='none')
175
-
176
  fig.tight_layout()
177
  st.pyplot(fig)
178
 
@@ -197,8 +170,7 @@ def display_key_tables() -> None:
197
  st.table(symbol_df)
198
 
199
  # Columns
200
- st.write(dedent("""\
201
- #### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
202
 
203
  Sometimes, no metric events may have been reported during a given time period. In this case,
204
  you must decide how you will treat missing data points. Ignore it? Or consider it a failure.
 
3
  import matplotlib.pyplot as plt
4
  from datetime import datetime, time, date
5
  from typing import List, Dict, Any, Tuple
6
+ from utils import generate_random_data, evaluate_alarm_state, aggregate_data
7
  from textwrap import dedent
8
 
9
  # Constants
 
21
 
22
  if not st.session_state.df.empty:
23
  display_dataframe("Raw Event Data", st.session_state.df)
24
+ plot_time_series(st.session_state.df, "ResponseTime(ms)")
25
 
26
+ # Section 2 - Calculate Aggregations
27
+ st.header("Section 2 - Calculate Aggregations")
28
+ aggregation_form()
29
 
30
+ if not st.session_state.aggregated_df.empty:
31
+ display_dataframe("Aggregated Summary Data", st.session_state.aggregated_df)
32
+ plot_time_series(st.session_state.aggregated_df, st.session_state.aggregation_function_input)
33
 
34
  # Section 3 - Summary Data Aggregated by Period
35
  st.header("Section 3 - Summary Data Aggregated by Period")
 
37
 
38
  if not st.session_state.summary_by_period_df.empty:
39
  display_dataframe("Summary Data Aggregated by Period", st.session_state.summary_by_period_df)
40
+ plot_time_series(st.session_state.summary_by_period_df, st.session_state.aggregation_function_input)
41
 
42
  # Section 4 - Evaluate Alarm State
43
  st.header("Section 4 - Evaluate Alarm State")
44
  alarm_state_form()
45
 
46
  if not st.session_state.alarm_state_df.empty:
 
47
  display_alarm_state_evaluation(st.session_state.alarm_state_df)
48
 
49
  display_key_tables()
 
51
  def initialize_session_state() -> None:
52
  if 'df' not in st.session_state:
53
  st.session_state.df = pd.DataFrame()
54
+ if 'aggregated_df' not in st.session_state:
55
+ st.session_state.aggregated_df = pd.DataFrame()
56
  if 'summary_by_period_df' not in st.session_state:
57
  st.session_state.summary_by_period_df = pd.DataFrame()
58
  if 'alarm_state_df' not in st.session_state:
 
77
  null_percentage=null_percentage_input
78
  )
79
 
80
+ def aggregation_form() -> None:
81
  freq_input = st.selectbox("Period (bin)", ['1min', '5min', '15min'], key='freq_input', help="Select the frequency for aggregating the data.")
82
+ aggregation_function_input = st.selectbox(
83
+ "Aggregation Function",
84
+ ['p50', 'p95', 'p99', 'max', 'min', 'average'],
85
+ key='aggregation_function_input',
86
+ help="Select the aggregation function for visualizing the data."
87
+ )
88
  if not st.session_state.df.empty:
89
+ st.session_state.aggregated_df = aggregate_data(st.session_state.df, freq_input)
90
 
91
  def summary_by_period_form() -> None:
92
  period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
93
  if not st.session_state.df.empty:
94
  st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
 
 
95
 
96
  def alarm_state_form() -> None:
97
  threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
98
  datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
99
  evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
 
 
 
 
 
 
100
  alarm_condition_input = st.selectbox(
101
  "Alarm Condition",
102
  ['>', '>=', '<', '<='],
 
109
  threshold=threshold_input,
110
  datapoints_to_alarm=datapoints_to_alarm_input,
111
  evaluation_range=evaluation_range_input,
112
+ aggregation_function=st.session_state.aggregation_function_input,
113
  alarm_condition=alarm_condition_input
114
  )
115
 
 
117
  st.write(title)
118
  st.dataframe(df)
119
 
120
+ def plot_time_series(df: pd.DataFrame, column: str) -> None:
121
  timestamps = df['Timestamp']
122
+ response_times = df[column]
123
 
124
  segments = []
125
  current_segment = {'timestamps': [], 'values': []}
 
140
 
141
  color = 'tab:blue'
142
  ax1.set_xlabel('Timestamp')
143
+ ax1.set_ylabel(f'{column} (ms)', color=color)
144
 
145
  for segment in segments:
146
  ax1.plot(segment['timestamps'], segment['values'], color=color, linewidth=0.5)
147
  ax1.scatter(segment['timestamps'], segment['values'], color=color, s=10)
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  fig.tight_layout()
150
  st.pyplot(fig)
151
 
 
170
  st.table(symbol_df)
171
 
172
  # Columns
173
+ st.write(dedent(""" #### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
 
174
 
175
  Sometimes, no metric events may have been reported during a given time period. In this case,
176
  you must decide how you will treat missing data points. Ignore it? Or consider it a failure.