Spaces:
Sleeping
Sleeping
plotting lines, but with matplotlib everywhere
Browse files- streamlit_app.py +24 -52
streamlit_app.py
CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
|
|
3 |
import matplotlib.pyplot as plt
|
4 |
from datetime import datetime, time, date
|
5 |
from typing import List, Dict, Any, Tuple
|
6 |
-
from utils import generate_random_data,
|
7 |
from textwrap import dedent
|
8 |
|
9 |
# Constants
|
@@ -21,13 +21,15 @@ def main():
|
|
21 |
|
22 |
if not st.session_state.df.empty:
|
23 |
display_dataframe("Raw Event Data", st.session_state.df)
|
|
|
24 |
|
25 |
-
# Section 2 - Calculate
|
26 |
-
st.header("Section 2 - Calculate
|
27 |
-
|
28 |
|
29 |
-
if not st.session_state.
|
30 |
-
display_dataframe("Aggregated Summary Data", st.session_state.
|
|
|
31 |
|
32 |
# Section 3 - Summary Data Aggregated by Period
|
33 |
st.header("Section 3 - Summary Data Aggregated by Period")
|
@@ -35,13 +37,13 @@ def main():
|
|
35 |
|
36 |
if not st.session_state.summary_by_period_df.empty:
|
37 |
display_dataframe("Summary Data Aggregated by Period", st.session_state.summary_by_period_df)
|
|
|
38 |
|
39 |
# Section 4 - Evaluate Alarm State
|
40 |
st.header("Section 4 - Evaluate Alarm State")
|
41 |
alarm_state_form()
|
42 |
|
43 |
if not st.session_state.alarm_state_df.empty:
|
44 |
-
plot_time_series(st.session_state.summary_by_period_df, st.session_state.threshold_input, st.session_state.alarm_condition_input, st.session_state.evaluation_range_input)
|
45 |
display_alarm_state_evaluation(st.session_state.alarm_state_df)
|
46 |
|
47 |
display_key_tables()
|
@@ -49,8 +51,8 @@ def main():
|
|
49 |
def initialize_session_state() -> None:
|
50 |
if 'df' not in st.session_state:
|
51 |
st.session_state.df = pd.DataFrame()
|
52 |
-
if '
|
53 |
-
st.session_state.
|
54 |
if 'summary_by_period_df' not in st.session_state:
|
55 |
st.session_state.summary_by_period_df = pd.DataFrame()
|
56 |
if 'alarm_state_df' not in st.session_state:
|
@@ -75,29 +77,26 @@ def generate_data_form() -> None:
|
|
75 |
null_percentage=null_percentage_input
|
76 |
)
|
77 |
|
78 |
-
def
|
79 |
freq_input = st.selectbox("Period (bin)", ['1min', '5min', '15min'], key='freq_input', help="Select the frequency for aggregating the data.")
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
81 |
if not st.session_state.df.empty:
|
82 |
-
st.session_state.
|
83 |
|
84 |
def summary_by_period_form() -> None:
|
85 |
period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
|
86 |
if not st.session_state.df.empty:
|
87 |
st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
|
88 |
-
else:
|
89 |
-
st.warning("No data available to aggregate.")
|
90 |
|
91 |
def alarm_state_form() -> None:
|
92 |
threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
|
93 |
datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
|
94 |
evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
|
95 |
-
aggregation_function_input = st.selectbox(
|
96 |
-
"Aggregation Function",
|
97 |
-
['p50', 'p95', 'p99', 'max', 'min', 'average'],
|
98 |
-
key='aggregation_function_input',
|
99 |
-
help="Select the aggregation function for visualizing the data and computing alarms."
|
100 |
-
)
|
101 |
alarm_condition_input = st.selectbox(
|
102 |
"Alarm Condition",
|
103 |
['>', '>=', '<', '<='],
|
@@ -110,7 +109,7 @@ def alarm_state_form() -> None:
|
|
110 |
threshold=threshold_input,
|
111 |
datapoints_to_alarm=datapoints_to_alarm_input,
|
112 |
evaluation_range=evaluation_range_input,
|
113 |
-
aggregation_function=aggregation_function_input,
|
114 |
alarm_condition=alarm_condition_input
|
115 |
)
|
116 |
|
@@ -118,9 +117,9 @@ def display_dataframe(title: str, df: pd.DataFrame) -> None:
|
|
118 |
st.write(title)
|
119 |
st.dataframe(df)
|
120 |
|
121 |
-
def plot_time_series(df: pd.DataFrame,
|
122 |
timestamps = df['Timestamp']
|
123 |
-
response_times = df[
|
124 |
|
125 |
segments = []
|
126 |
current_segment = {'timestamps': [], 'values': []}
|
@@ -141,38 +140,12 @@ def plot_time_series(df: pd.DataFrame, threshold: int, alarm_condition: str, eva
|
|
141 |
|
142 |
color = 'tab:blue'
|
143 |
ax1.set_xlabel('Timestamp')
|
144 |
-
ax1.set_ylabel('
|
145 |
|
146 |
for segment in segments:
|
147 |
ax1.plot(segment['timestamps'], segment['values'], color=color, linewidth=0.5)
|
148 |
ax1.scatter(segment['timestamps'], segment['values'], color=color, s=10)
|
149 |
|
150 |
-
line_style = '--' if alarm_condition in ['<', '>'] else '-'
|
151 |
-
ax1.axhline(y=threshold, color='r', linestyle=line_style, linewidth=0.8, label='Threshold')
|
152 |
-
ax1.tick_params(axis='y', labelcolor=color)
|
153 |
-
|
154 |
-
if alarm_condition in ['<=', '<']:
|
155 |
-
ax1.fill_between(timestamps, 0, threshold, color='pink', alpha=0.3)
|
156 |
-
else:
|
157 |
-
ax1.fill_between(timestamps, threshold, response_times.max(), color='pink', alpha=0.3)
|
158 |
-
|
159 |
-
period_indices = range(len(df))
|
160 |
-
ax2 = ax1.twiny()
|
161 |
-
ax2.set_xticks(period_indices)
|
162 |
-
ax2.set_xticklabels(period_indices, fontsize=8)
|
163 |
-
ax2.set_xlabel('Time Periods', fontsize=8)
|
164 |
-
ax2.xaxis.set_tick_params(width=0.5)
|
165 |
-
|
166 |
-
for idx in period_indices:
|
167 |
-
if idx % evaluation_range == 0:
|
168 |
-
ax1.axvline(x=df['Timestamp'].iloc[idx], color='green', linestyle='-', alpha=0.3)
|
169 |
-
max_value = max(filter(lambda x: x is not None, df[st.session_state.aggregation_function_input]))
|
170 |
-
ax1.text(df['Timestamp'].iloc[idx], max_value * 0.95, f"[{idx // evaluation_range}]", rotation=90, verticalalignment='bottom', color='grey', alpha=0.7, fontsize=8)
|
171 |
-
else:
|
172 |
-
ax1.axvline(x=df['Timestamp'].iloc[idx], color='grey', linestyle='--', alpha=0.3)
|
173 |
-
|
174 |
-
ax1.annotate('Alarm threshold', xy=(0.98, threshold), xycoords=('axes fraction', 'data'), ha='right', va='bottom', fontsize=8, color='red', backgroundcolor='none')
|
175 |
-
|
176 |
fig.tight_layout()
|
177 |
st.pyplot(fig)
|
178 |
|
@@ -197,8 +170,7 @@ def display_key_tables() -> None:
|
|
197 |
st.table(symbol_df)
|
198 |
|
199 |
# Columns
|
200 |
-
st.write(dedent("""
|
201 |
-
#### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
|
202 |
|
203 |
Sometimes, no metric events may have been reported during a given time period. In this case,
|
204 |
you must decide how you will treat missing data points. Ignore it? Or consider it a failure.
|
|
|
3 |
import matplotlib.pyplot as plt
|
4 |
from datetime import datetime, time, date
|
5 |
from typing import List, Dict, Any, Tuple
|
6 |
+
from utils import generate_random_data, evaluate_alarm_state, aggregate_data
|
7 |
from textwrap import dedent
|
8 |
|
9 |
# Constants
|
|
|
21 |
|
22 |
if not st.session_state.df.empty:
|
23 |
display_dataframe("Raw Event Data", st.session_state.df)
|
24 |
+
plot_time_series(st.session_state.df, "ResponseTime(ms)")
|
25 |
|
26 |
+
# Section 2 - Calculate Aggregations
|
27 |
+
st.header("Section 2 - Calculate Aggregations")
|
28 |
+
aggregation_form()
|
29 |
|
30 |
+
if not st.session_state.aggregated_df.empty:
|
31 |
+
display_dataframe("Aggregated Summary Data", st.session_state.aggregated_df)
|
32 |
+
plot_time_series(st.session_state.aggregated_df, st.session_state.aggregation_function_input)
|
33 |
|
34 |
# Section 3 - Summary Data Aggregated by Period
|
35 |
st.header("Section 3 - Summary Data Aggregated by Period")
|
|
|
37 |
|
38 |
if not st.session_state.summary_by_period_df.empty:
|
39 |
display_dataframe("Summary Data Aggregated by Period", st.session_state.summary_by_period_df)
|
40 |
+
plot_time_series(st.session_state.summary_by_period_df, st.session_state.aggregation_function_input)
|
41 |
|
42 |
# Section 4 - Evaluate Alarm State
|
43 |
st.header("Section 4 - Evaluate Alarm State")
|
44 |
alarm_state_form()
|
45 |
|
46 |
if not st.session_state.alarm_state_df.empty:
|
|
|
47 |
display_alarm_state_evaluation(st.session_state.alarm_state_df)
|
48 |
|
49 |
display_key_tables()
|
|
|
51 |
def initialize_session_state() -> None:
|
52 |
if 'df' not in st.session_state:
|
53 |
st.session_state.df = pd.DataFrame()
|
54 |
+
if 'aggregated_df' not in st.session_state:
|
55 |
+
st.session_state.aggregated_df = pd.DataFrame()
|
56 |
if 'summary_by_period_df' not in st.session_state:
|
57 |
st.session_state.summary_by_period_df = pd.DataFrame()
|
58 |
if 'alarm_state_df' not in st.session_state:
|
|
|
77 |
null_percentage=null_percentage_input
|
78 |
)
|
79 |
|
80 |
+
def aggregation_form() -> None:
|
81 |
freq_input = st.selectbox("Period (bin)", ['1min', '5min', '15min'], key='freq_input', help="Select the frequency for aggregating the data.")
|
82 |
+
aggregation_function_input = st.selectbox(
|
83 |
+
"Aggregation Function",
|
84 |
+
['p50', 'p95', 'p99', 'max', 'min', 'average'],
|
85 |
+
key='aggregation_function_input',
|
86 |
+
help="Select the aggregation function for visualizing the data."
|
87 |
+
)
|
88 |
if not st.session_state.df.empty:
|
89 |
+
st.session_state.aggregated_df = aggregate_data(st.session_state.df, freq_input)
|
90 |
|
91 |
def summary_by_period_form() -> None:
|
92 |
period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
|
93 |
if not st.session_state.df.empty:
|
94 |
st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
|
|
|
|
|
95 |
|
96 |
def alarm_state_form() -> None:
|
97 |
threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
|
98 |
datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
|
99 |
evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
alarm_condition_input = st.selectbox(
|
101 |
"Alarm Condition",
|
102 |
['>', '>=', '<', '<='],
|
|
|
109 |
threshold=threshold_input,
|
110 |
datapoints_to_alarm=datapoints_to_alarm_input,
|
111 |
evaluation_range=evaluation_range_input,
|
112 |
+
aggregation_function=st.session_state.aggregation_function_input,
|
113 |
alarm_condition=alarm_condition_input
|
114 |
)
|
115 |
|
|
|
117 |
st.write(title)
|
118 |
st.dataframe(df)
|
119 |
|
120 |
+
def plot_time_series(df: pd.DataFrame, column: str) -> None:
|
121 |
timestamps = df['Timestamp']
|
122 |
+
response_times = df[column]
|
123 |
|
124 |
segments = []
|
125 |
current_segment = {'timestamps': [], 'values': []}
|
|
|
140 |
|
141 |
color = 'tab:blue'
|
142 |
ax1.set_xlabel('Timestamp')
|
143 |
+
ax1.set_ylabel(f'{column} (ms)', color=color)
|
144 |
|
145 |
for segment in segments:
|
146 |
ax1.plot(segment['timestamps'], segment['values'], color=color, linewidth=0.5)
|
147 |
ax1.scatter(segment['timestamps'], segment['values'], color=color, s=10)
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
fig.tight_layout()
|
150 |
st.pyplot(fig)
|
151 |
|
|
|
170 |
st.table(symbol_df)
|
171 |
|
172 |
# Columns
|
173 |
+
st.write(dedent(""" #### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
|
|
|
174 |
|
175 |
Sometimes, no metric events may have been reported during a given time period. In this case,
|
176 |
you must decide how you will treat missing data points. Ignore it? Or consider it a failure.
|