phitoduck commited on
Commit
bc28645
·
1 Parent(s): 755ac75
Files changed (2) hide show
  1. streamlit_app.py +20 -9
  2. utils.py +20 -17
streamlit_app.py CHANGED
@@ -4,6 +4,7 @@ import matplotlib.pyplot as plt
4
  from datetime import datetime, time, date
5
  from typing import List, Dict, Any, Tuple
6
  from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
 
7
 
8
  # Constants
9
  HARD_CODED_DATE = date(2024, 7, 26)
@@ -84,9 +85,11 @@ def summary_by_period_form() -> None:
84
  period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
85
  if not st.session_state.df.empty:
86
  st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
 
 
87
 
88
  def alarm_state_form() -> None:
89
- threshold_input = st.number_input("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
90
  datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
91
  evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
92
  aggregation_function_input = st.selectbox(
@@ -183,7 +186,7 @@ def display_key_tables() -> None:
183
  # Symbols
184
  st.write("#### Symbols")
185
  symbol_data = {
186
- "Symbol": ["X", "-", "0"],
187
  "Meaning": [
188
  "Breaching data point: This data point exceeds the threshold.",
189
  "Missing data point: This data point is missing or not reported.",
@@ -194,14 +197,22 @@ def display_key_tables() -> None:
194
  st.table(symbol_df)
195
 
196
  # Columns
197
- st.write("#### Columns")
 
 
 
 
 
 
 
 
198
  column_data = {
199
- "Column": ["MISSING", "IGNORE", "BREACHING", "NOT BREACHING"],
200
- "Meaning": [
201
- "Action to take when all data points are missing. Possible values: INSUFFICIENT_DATA, Retain current state, ALARM, OK.",
202
- "Action to take when data points are missing but ignored. Possible values: Retain current state, ALARM, OK.",
203
- "Action to take when missing data points are treated as breaching. Possible values: ALARM, OK.",
204
- "Action to take when missing data points are treated as not breaching. Possible values: ALARM, OK."
205
  ]
206
  }
207
  column_df = pd.DataFrame(column_data)
 
4
  from datetime import datetime, time, date
5
  from typing import List, Dict, Any, Tuple
6
  from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
7
+ from textwrap import dedent
8
 
9
  # Constants
10
  HARD_CODED_DATE = date(2024, 7, 26)
 
85
  period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
86
  if not st.session_state.df.empty:
87
  st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
88
+ else:
89
+ st.warning("No data available to aggregate.")
90
 
91
  def alarm_state_form() -> None:
92
+ threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
93
  datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
94
  evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
95
  aggregation_function_input = st.selectbox(
 
186
  # Symbols
187
  st.write("#### Symbols")
188
  symbol_data = {
189
+ "Symbol": ["🔴", "⚫️", "🟢"],
190
  "Meaning": [
191
  "Breaching data point: This data point exceeds the threshold.",
192
  "Missing data point: This data point is missing or not reported.",
 
197
  st.table(symbol_df)
198
 
199
  # Columns
200
+ st.write(dedent("""\
201
+ #### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
202
+
203
+ Sometimes, no metric events may have been reported during a given time period. In this case,
204
+ you must decide how you will treat missing data points. Ignore it? Or consider it a failure.
205
+
206
+ Here are the 4 supported strategies in AWS:
207
+ """))
208
+
209
  column_data = {
210
+ "Strategy": ["missing", "ignore", "breaching", "notBreaching"],
211
+ "Explanation": [
212
+ "If all data points in the alarm evaluation range are missing, the alarm transitions to INSUFFICIENT_DATA. Possible values: INSUFFICIENT_DATA, Retain current state, ALARM, OK.",
213
+ "The current alarm state is maintained. Possible values: Retain current state, ALARM, OK.",
214
+ "Missing data points are treated as \"bad\" and breaching the threshold. Possible values: ALARM, OK.",
215
+ "Missing data points are treated as \"good\" and within the threshold. Possible values: ALARM, OK."
216
  ]
217
  }
218
  column_df = pd.DataFrame(column_data)
utils.py CHANGED
@@ -42,8 +42,7 @@ def calculate_percentile(
42
  freq: str,
43
  percentile: float
44
  ) -> pd.DataFrame:
45
- percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"]\
46
- .quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
47
  percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
48
  return percentile_df
49
 
@@ -51,13 +50,16 @@ def aggregate_data(
51
  df: pd.DataFrame,
52
  period_length: str
53
  ) -> pd.DataFrame:
 
 
 
54
  aggregation_funcs = {
55
- 'p50': lambda x: np.percentile(x.dropna(), 50),
56
- 'p95': lambda x: np.percentile(x.dropna(), 95),
57
- 'p99': lambda x: np.percentile(x.dropna(), 99),
58
- 'max': lambda x: np.max(x.dropna()),
59
- 'min': lambda x: np.min(x.dropna()),
60
- 'average': lambda x: np.mean(x.dropna())
61
  }
62
 
63
  summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
@@ -111,30 +113,31 @@ def evaluate_alarm_state(
111
 
112
  for dp in chunk:
113
  if dp is None:
114
- data_point_repr += '-'
115
  elif check_condition(dp, threshold, alarm_condition):
116
- data_point_repr += 'X'
117
  else:
118
- data_point_repr += '0'
 
119
 
120
  if len(chunk) < evaluation_range:
121
- data_point_repr += '-' * (evaluation_range - len(chunk))
122
 
123
- if data_point_repr.count('-') > (evaluation_range - datapoints_to_alarm):
124
- num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('0'), data_point_repr.count('X')])
125
 
126
  data_table_dict["DataPoints"].append(data_point_repr)
127
  data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
128
 
129
  if num_dp_that_must_be_filled > 0:
130
- data_table_dict["MISSING"].append("INSUFFICIENT_DATA" if data_point_repr.count('-') == evaluation_range else "Retain current state")
131
  data_table_dict["IGNORE"].append("Retain current state")
132
  data_table_dict["BREACHING"].append("ALARM")
133
  data_table_dict["NOT BREACHING"].append("OK")
134
  else:
135
  data_table_dict["MISSING"].append("OK")
136
  data_table_dict["IGNORE"].append("Retain current state")
137
- data_table_dict["BREACHING"].append("ALARM" if 'X' * datapoints_to_alarm in data_point_repr else "OK")
138
- data_table_dict["NOT BREACHING"].append("ALARM" if '0' * datapoints_to_alarm not in data_point_repr else "OK")
139
 
140
  return pd.DataFrame(data_table_dict)
 
42
  freq: str,
43
  percentile: float
44
  ) -> pd.DataFrame:
45
+ percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"] .quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
 
46
  percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
47
  return percentile_df
48
 
 
50
  df: pd.DataFrame,
51
  period_length: str
52
  ) -> pd.DataFrame:
53
+ if df.empty:
54
+ return pd.DataFrame() # Return an empty DataFrame if input is empty
55
+
56
  aggregation_funcs = {
57
+ 'p50': lambda x: np.percentile(x.dropna(), 50) if not x.dropna().empty else np.nan,
58
+ 'p95': lambda x: np.percentile(x.dropna(), 95) if not x.dropna().empty else np.nan,
59
+ 'p99': lambda x: np.percentile(x.dropna(), 99) if not x.dropna().empty else np.nan,
60
+ 'max': lambda x: np.max(x.dropna()) if not x.dropna().empty else np.nan,
61
+ 'min': lambda x: np.min(x.dropna()) if not x.dropna().empty else np.nan,
62
+ 'average': lambda x: np.mean(x.dropna()) if not x.dropna().empty else np.nan
63
  }
64
 
65
  summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
 
113
 
114
  for dp in chunk:
115
  if dp is None:
116
+ dp_symbol = '⚫️'
117
  elif check_condition(dp, threshold, alarm_condition):
118
+ dp_symbol = '🔴'
119
  else:
120
+ dp_symbol = '🟢'
121
+ data_point_repr += dp_symbol
122
 
123
  if len(chunk) < evaluation_range:
124
+ data_point_repr += '⚫️' * (evaluation_range - len(chunk))
125
 
126
+ if data_point_repr.count('⚫️') > (evaluation_range - datapoints_to_alarm):
127
+ num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('🟢'), data_point_repr.count('🔴')])
128
 
129
  data_table_dict["DataPoints"].append(data_point_repr)
130
  data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
131
 
132
  if num_dp_that_must_be_filled > 0:
133
+ data_table_dict["MISSING"].append("INSUFFICIENT_DATA" if data_point_repr.count('⚫️') == evaluation_range else "Retain current state")
134
  data_table_dict["IGNORE"].append("Retain current state")
135
  data_table_dict["BREACHING"].append("ALARM")
136
  data_table_dict["NOT BREACHING"].append("OK")
137
  else:
138
  data_table_dict["MISSING"].append("OK")
139
  data_table_dict["IGNORE"].append("Retain current state")
140
+ data_table_dict["BREACHING"].append("ALARM" if '🔴' * datapoints_to_alarm in data_point_repr else "OK")
141
+ data_table_dict["NOT BREACHING"].append("ALARM" if '🟢' * datapoints_to_alarm not in data_point_repr else "OK")
142
 
143
  return pd.DataFrame(data_table_dict)