Spaces:
Sleeping
Sleeping
works
Browse files- streamlit_app.py +20 -9
- utils.py +20 -17
streamlit_app.py
CHANGED
@@ -4,6 +4,7 @@ import matplotlib.pyplot as plt
|
|
4 |
from datetime import datetime, time, date
|
5 |
from typing import List, Dict, Any, Tuple
|
6 |
from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
|
|
|
7 |
|
8 |
# Constants
|
9 |
HARD_CODED_DATE = date(2024, 7, 26)
|
@@ -84,9 +85,11 @@ def summary_by_period_form() -> None:
|
|
84 |
period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
|
85 |
if not st.session_state.df.empty:
|
86 |
st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
|
|
|
|
|
87 |
|
88 |
def alarm_state_form() -> None:
|
89 |
-
threshold_input = st.
|
90 |
datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
|
91 |
evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
|
92 |
aggregation_function_input = st.selectbox(
|
@@ -183,7 +186,7 @@ def display_key_tables() -> None:
|
|
183 |
# Symbols
|
184 |
st.write("#### Symbols")
|
185 |
symbol_data = {
|
186 |
-
"Symbol": ["
|
187 |
"Meaning": [
|
188 |
"Breaching data point: This data point exceeds the threshold.",
|
189 |
"Missing data point: This data point is missing or not reported.",
|
@@ -194,14 +197,22 @@ def display_key_tables() -> None:
|
|
194 |
st.table(symbol_df)
|
195 |
|
196 |
# Columns
|
197 |
-
st.write("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
column_data = {
|
199 |
-
"
|
200 |
-
"
|
201 |
-
"
|
202 |
-
"
|
203 |
-
"
|
204 |
-
"
|
205 |
]
|
206 |
}
|
207 |
column_df = pd.DataFrame(column_data)
|
|
|
4 |
from datetime import datetime, time, date
|
5 |
from typing import List, Dict, Any, Tuple
|
6 |
from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
|
7 |
+
from textwrap import dedent
|
8 |
|
9 |
# Constants
|
10 |
HARD_CODED_DATE = date(2024, 7, 26)
|
|
|
85 |
period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
|
86 |
if not st.session_state.df.empty:
|
87 |
st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
|
88 |
+
else:
|
89 |
+
st.warning("No data available to aggregate.")
|
90 |
|
91 |
def alarm_state_form() -> None:
|
92 |
+
threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
|
93 |
datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
|
94 |
evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
|
95 |
aggregation_function_input = st.selectbox(
|
|
|
186 |
# Symbols
|
187 |
st.write("#### Symbols")
|
188 |
symbol_data = {
|
189 |
+
"Symbol": ["🔴", "⚫️", "🟢"],
|
190 |
"Meaning": [
|
191 |
"Breaching data point: This data point exceeds the threshold.",
|
192 |
"Missing data point: This data point is missing or not reported.",
|
|
|
197 |
st.table(symbol_df)
|
198 |
|
199 |
# Columns
|
200 |
+
st.write(dedent("""\
|
201 |
+
#### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
|
202 |
+
|
203 |
+
Sometimes, no metric events may have been reported during a given time period. In this case,
|
204 |
+
you must decide how you will treat missing data points. Ignore it? Or consider it a failure.
|
205 |
+
|
206 |
+
Here are the 4 supported strategies in AWS:
|
207 |
+
"""))
|
208 |
+
|
209 |
column_data = {
|
210 |
+
"Strategy": ["missing", "ignore", "breaching", "notBreaching"],
|
211 |
+
"Explanation": [
|
212 |
+
"If all data points in the alarm evaluation range are missing, the alarm transitions to INSUFFICIENT_DATA. Possible values: INSUFFICIENT_DATA, Retain current state, ALARM, OK.",
|
213 |
+
"The current alarm state is maintained. Possible values: Retain current state, ALARM, OK.",
|
214 |
+
"Missing data points are treated as \"bad\" and breaching the threshold. Possible values: ALARM, OK.",
|
215 |
+
"Missing data points are treated as \"good\" and within the threshold. Possible values: ALARM, OK."
|
216 |
]
|
217 |
}
|
218 |
column_df = pd.DataFrame(column_data)
|
utils.py
CHANGED
@@ -42,8 +42,7 @@ def calculate_percentile(
|
|
42 |
freq: str,
|
43 |
percentile: float
|
44 |
) -> pd.DataFrame:
|
45 |
-
percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"]
|
46 |
-
.quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
|
47 |
percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
|
48 |
return percentile_df
|
49 |
|
@@ -51,13 +50,16 @@ def aggregate_data(
|
|
51 |
df: pd.DataFrame,
|
52 |
period_length: str
|
53 |
) -> pd.DataFrame:
|
|
|
|
|
|
|
54 |
aggregation_funcs = {
|
55 |
-
'p50': lambda x: np.percentile(x.dropna(), 50),
|
56 |
-
'p95': lambda x: np.percentile(x.dropna(), 95),
|
57 |
-
'p99': lambda x: np.percentile(x.dropna(), 99),
|
58 |
-
'max': lambda x: np.max(x.dropna()),
|
59 |
-
'min': lambda x: np.min(x.dropna()),
|
60 |
-
'average': lambda x: np.mean(x.dropna())
|
61 |
}
|
62 |
|
63 |
summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
|
@@ -111,30 +113,31 @@ def evaluate_alarm_state(
|
|
111 |
|
112 |
for dp in chunk:
|
113 |
if dp is None:
|
114 |
-
|
115 |
elif check_condition(dp, threshold, alarm_condition):
|
116 |
-
|
117 |
else:
|
118 |
-
|
|
|
119 |
|
120 |
if len(chunk) < evaluation_range:
|
121 |
-
data_point_repr += '
|
122 |
|
123 |
-
if data_point_repr.count('
|
124 |
-
num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('
|
125 |
|
126 |
data_table_dict["DataPoints"].append(data_point_repr)
|
127 |
data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
|
128 |
|
129 |
if num_dp_that_must_be_filled > 0:
|
130 |
-
data_table_dict["MISSING"].append("INSUFFICIENT_DATA" if data_point_repr.count('
|
131 |
data_table_dict["IGNORE"].append("Retain current state")
|
132 |
data_table_dict["BREACHING"].append("ALARM")
|
133 |
data_table_dict["NOT BREACHING"].append("OK")
|
134 |
else:
|
135 |
data_table_dict["MISSING"].append("OK")
|
136 |
data_table_dict["IGNORE"].append("Retain current state")
|
137 |
-
data_table_dict["BREACHING"].append("ALARM" if '
|
138 |
-
data_table_dict["NOT BREACHING"].append("ALARM" if '
|
139 |
|
140 |
return pd.DataFrame(data_table_dict)
|
|
|
42 |
freq: str,
|
43 |
percentile: float
|
44 |
) -> pd.DataFrame:
|
45 |
+
percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"] .quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
|
|
|
46 |
percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
|
47 |
return percentile_df
|
48 |
|
|
|
50 |
df: pd.DataFrame,
|
51 |
period_length: str
|
52 |
) -> pd.DataFrame:
|
53 |
+
if df.empty:
|
54 |
+
return pd.DataFrame() # Return an empty DataFrame if input is empty
|
55 |
+
|
56 |
aggregation_funcs = {
|
57 |
+
'p50': lambda x: np.percentile(x.dropna(), 50) if not x.dropna().empty else np.nan,
|
58 |
+
'p95': lambda x: np.percentile(x.dropna(), 95) if not x.dropna().empty else np.nan,
|
59 |
+
'p99': lambda x: np.percentile(x.dropna(), 99) if not x.dropna().empty else np.nan,
|
60 |
+
'max': lambda x: np.max(x.dropna()) if not x.dropna().empty else np.nan,
|
61 |
+
'min': lambda x: np.min(x.dropna()) if not x.dropna().empty else np.nan,
|
62 |
+
'average': lambda x: np.mean(x.dropna()) if not x.dropna().empty else np.nan
|
63 |
}
|
64 |
|
65 |
summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
|
|
|
113 |
|
114 |
for dp in chunk:
|
115 |
if dp is None:
|
116 |
+
dp_symbol = '⚫️'
|
117 |
elif check_condition(dp, threshold, alarm_condition):
|
118 |
+
dp_symbol = '🔴'
|
119 |
else:
|
120 |
+
dp_symbol = '🟢'
|
121 |
+
data_point_repr += dp_symbol
|
122 |
|
123 |
if len(chunk) < evaluation_range:
|
124 |
+
data_point_repr += '⚫️' * (evaluation_range - len(chunk))
|
125 |
|
126 |
+
if data_point_repr.count('⚫️') > (evaluation_range - datapoints_to_alarm):
|
127 |
+
num_dp_that_must_be_filled = datapoints_to_alarm - sum([data_point_repr.count('🟢'), data_point_repr.count('🔴')])
|
128 |
|
129 |
data_table_dict["DataPoints"].append(data_point_repr)
|
130 |
data_table_dict["# of data points that must be filled"].append(num_dp_that_must_be_filled)
|
131 |
|
132 |
if num_dp_that_must_be_filled > 0:
|
133 |
+
data_table_dict["MISSING"].append("INSUFFICIENT_DATA" if data_point_repr.count('⚫️') == evaluation_range else "Retain current state")
|
134 |
data_table_dict["IGNORE"].append("Retain current state")
|
135 |
data_table_dict["BREACHING"].append("ALARM")
|
136 |
data_table_dict["NOT BREACHING"].append("OK")
|
137 |
else:
|
138 |
data_table_dict["MISSING"].append("OK")
|
139 |
data_table_dict["IGNORE"].append("Retain current state")
|
140 |
+
data_table_dict["BREACHING"].append("ALARM" if '🔴' * datapoints_to_alarm in data_point_repr else "OK")
|
141 |
+
data_table_dict["NOT BREACHING"].append("ALARM" if '🟢' * datapoints_to_alarm not in data_point_repr else "OK")
|
142 |
|
143 |
return pd.DataFrame(data_table_dict)
|