File size: 11,226 Bytes
755ac75
 
02f17d0
adc9f0c
bc28645
02f17d0
755ac75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adc9f0c
755ac75
64bdbaf
 
 
755ac75
64bdbaf
02f17d0
 
 
 
 
 
 
adc9f0c
755ac75
 
 
 
 
 
02f17d0
 
 
 
 
 
 
adc9f0c
755ac75
 
 
 
 
 
02f17d0
755ac75
 
 
 
 
 
 
64bdbaf
 
755ac75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64bdbaf
755ac75
 
64bdbaf
755ac75
 
 
adc9f0c
 
755ac75
 
bc28645
755ac75
 
 
 
 
 
 
 
 
 
 
 
 
 
02f17d0
755ac75
 
 
 
 
 
 
02f17d0
755ac75
02f17d0
755ac75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02f17d0
755ac75
 
 
 
 
02f17d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755ac75
 
 
 
 
 
 
 
 
 
 
 
 
bc28645
755ac75
 
 
 
 
 
 
 
 
 
64bdbaf
bc28645
 
 
 
 
 
 
755ac75
bc28645
 
 
 
 
 
755ac75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adc9f0c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import streamlit as st
import pandas as pd
from datetime import time, date
from utils import generate_random_data, evaluate_alarm_state, aggregate_data, re_aggregate_data
from textwrap import dedent
from matplotlib import pyplot as plt

# Constants
HARD_CODED_DATE = date(2024, 7, 26)

def main():
    st.title("Streamlit App for Data Generation and Analysis")

    # Initialize session state
    initialize_session_state()

    # Section 1 - Generate random data
    st.header("Section 1 - Generate Random Data")
    generate_data_form()

    if not st.session_state.df.empty:
        display_dataframe("Raw Event Data", st.session_state.df)
        st.line_chart(st.session_state.df.set_index("Timestamp"))

    # Section 2 - Calculate Aggregations
    st.header("Section 2 - Calculate Aggregations")
    aggregation_form()

    if not st.session_state.aggregated_df.empty:
        display_dataframe("Aggregated Summary Data (Storage)", st.session_state.aggregated_df)
        aggregation_function_input__storage = st.selectbox(
            "Aggregation Function (Storage)",
            ['p50', 'p95', 'p99', 'max', 'min', 'average'],
            key='aggregation_function_input__storage',
            help="Select the aggregation function for visualizing the data."
        )
        st.line_chart(st.session_state.aggregated_df.set_index("Timestamp")[aggregation_function_input__storage])

    # Section 3 - Summary Data Aggregated by Period
    st.header("Section 3 - Summary Data Aggregated by Period")
    summary_by_period_form()

    if not st.session_state.summary_by_period_df.empty:
        display_dataframe("Summary Data Aggregated by Period (for Alarm)", st.session_state.summary_by_period_df)
        aggregation_function_input__alarm = st.selectbox(
            "Aggregation Function (Alarm)",
            ['p50', 'p95', 'p99', 'max', 'min', 'average'],
            key='aggregation_function_input__alarm',
            help="Select the aggregation function for visualizing the data."
        )
        st.line_chart(st.session_state.summary_by_period_df.set_index("Timestamp")[aggregation_function_input__alarm])

    # Section 4 - Evaluate Alarm State
    st.header("Section 4 - Evaluate Alarm State")
    alarm_state_form()

    if not st.session_state.alarm_state_df.empty:
        plot_time_series(st.session_state.summary_by_period_df, st.session_state.threshold_input, st.session_state.alarm_condition_input, st.session_state.evaluation_range_input)
        display_alarm_state_evaluation(st.session_state.alarm_state_df)

    display_key_tables()

def initialize_session_state() -> None:
    if 'df' not in st.session_state:
        st.session_state.df = pd.DataFrame()
    if 'aggregated_df' not in st.session_state:
        st.session_state.aggregated_df = pd.DataFrame()
    if 'summary_by_period_df' not in st.session_state:
        st.session_state.summary_by_period_df = pd.DataFrame()
    if 'alarm_state_df' not in st.session_state:
        st.session_state.alarm_state_df = pd.DataFrame()

def generate_data_form() -> None:
    with st.form(key='generate_data_form'):
        start_time_input = st.time_input("Start Time", time(12, 0), help="Select the start time for generating random data.")
        end_time_input = st.time_input("End Time", time(12, 30), help="Select the end time for generating random data.")
        count_input = st.slider("Count", min_value=1, max_value=200, value=60, help="Specify the number of data points to generate.")
        response_time_range_input = st.slider("Response Time Range (ms)", min_value=50, max_value=300, value=(100, 250), help="Select the range of response times in milliseconds.")
        null_percentage_input = st.slider("Null Percentage", min_value=0.0, max_value=1.0, value=0.5, help="Select the percentage of null values in the generated data.")
        submit_button = st.form_submit_button(label='Generate Data')

        if submit_button:
            st.session_state.df = generate_random_data(
                date=HARD_CODED_DATE,
                start_time=start_time_input,
                end_time=end_time_input,
                count=count_input,
                response_time_range=response_time_range_input,
                null_percentage=null_percentage_input
            )

def aggregation_form() -> None:
    freq_input = st.selectbox("Period (bin)", ['1min', '5min', '15min'], key='freq_input', help="Select the frequency for aggregating the data.")
    if not st.session_state.df.empty:
        st.session_state.aggregated_df = aggregate_data(st.session_state.df, freq_input)

def summary_by_period_form() -> None:
    period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
    if not st.session_state.aggregated_df.empty:
        st.session_state.summary_by_period_df = re_aggregate_data(st.session_state.aggregated_df, period_length_input)

def alarm_state_form() -> None:
    threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
    datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
    evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
    alarm_condition_input = st.selectbox(
        "Alarm Condition",
        ['>', '>=', '<', '<='],
        key='alarm_condition_input',
        help="Select the condition for evaluating the alarm state."
    )
    if not st.session_state.summary_by_period_df.empty:
        st.session_state.alarm_state_df = evaluate_alarm_state(
            summary_df=st.session_state.summary_by_period_df,
            threshold=threshold_input,
            datapoints_to_alarm=datapoints_to_alarm_input,
            evaluation_range=evaluation_range_input,
            aggregation_function=st.session_state.aggregation_function_input__alarm,
            alarm_condition=alarm_condition_input
        )

def display_dataframe(title: str, df: pd.DataFrame) -> None:
    st.write(title)
    st.dataframe(df)

def plot_time_series(df: pd.DataFrame, threshold: int, alarm_condition: str, evaluation_range: int) -> None:
    timestamps = df['Timestamp']
    response_times = df[st.session_state.aggregation_function_input__alarm]

    segments = []
    current_segment = {'timestamps': [], 'values': []}

    for timestamp, value in zip(timestamps, response_times):
        if pd.isna(value):
            if current_segment['timestamps']:
                segments.append(current_segment)
                current_segment = {'timestamps': [], 'values': []}
        else:
            current_segment['timestamps'].append(timestamp)
            current_segment['values'].append(value)

    if current_segment['timestamps']:
        segments.append(current_segment)

    fig, ax1 = plt.subplots()

    color = 'tab:blue'
    ax1.set_xlabel('Timestamp')
    ax1.set_ylabel('Response Time (ms)', color=color)

    for segment in segments:
        ax1.plot(segment['timestamps'], segment['values'], color=color, linewidth=0.5)
        ax1.scatter(segment['timestamps'], segment['values'], color=color, s=10)

    line_style = '--' if alarm_condition in ['<', '>'] else '-'
    ax1.axhline(y=threshold, color='r', linestyle=line_style, linewidth=0.8, label='Threshold')
    ax1.tick_params(axis='y', labelcolor=color)

    if alarm_condition in ['<=', '<']:
        ax1.fill_between(timestamps, 0, threshold, color='pink', alpha=0.3)
    else:
        ax1.fill_between(timestamps, threshold, response_times.max(), color='pink', alpha=0.3)

    period_indices = range(len(df))
    ax2 = ax1.twiny()
    ax2.set_xticks(period_indices)
    ax2.set_xticklabels(period_indices, fontsize=8)
    ax2.set_xlabel('Time Periods', fontsize=8)
    ax2.xaxis.set_tick_params(width=0.5)

    for idx in period_indices:
        if idx % evaluation_range == 0:
            ax1.axvline(x=df['Timestamp'].iloc[idx], color='green', linestyle='-', alpha=0.3)
            max_value = max(filter(lambda x: x is not None, df[st.session_state.aggregation_function_input__alarm]))
            ax1.text(df['Timestamp'].iloc[idx], max_value * 0.95, f"[{idx // evaluation_range}]", rotation=90, verticalalignment='bottom', color='grey', alpha=0.7, fontsize=8)
        else:
            ax1.axvline(x=df['Timestamp'].iloc[idx], color='grey', linestyle='--', alpha=0.3)

    ax1.annotate('Alarm threshold', xy=(0.98, threshold), xycoords=('axes fraction', 'data'), ha='right', va='bottom', fontsize=8, color='red', backgroundcolor='none')

    fig.tight_layout()
    st.pyplot(fig)

def display_alarm_state_evaluation(df: pd.DataFrame) -> None:
    st.write("Alarm State Evaluation")
    st.dataframe(df)

def display_key_tables() -> None:
    st.write("### Key")

    # Symbols
    st.write("#### Symbols")
    symbol_data = {
        "Symbol": ["🔴", "⚫️", "🟢"],
        "Meaning": [
            "Breaching data point: This data point exceeds the threshold.",
            "Missing data point: This data point is missing or not reported.",
            "Non-breaching data point: This data point is within the threshold."
        ]
    }
    symbol_df = pd.DataFrame(symbol_data)
    st.table(symbol_df)

    # Columns
    st.write(dedent("""    #### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
             
    Sometimes, no metric events may have been reported during a given time period. In this case,
    you must decide how you will treat missing data points. Ignore it? Or consider it a failure.
             
    Here are the 4 supported strategies in AWS:
    """))

    column_data = {
        "Strategy": ["missing", "ignore", "breaching", "notBreaching"],
        "Explanation": [
            "If all data points in the alarm evaluation range are missing, the alarm transitions to INSUFFICIENT_DATA. Possible values: INSUFFICIENT_DATA, Retain current state, ALARM, OK.",
            "The current alarm state is maintained. Possible values: Retain current state, ALARM, OK.",
            "Missing data points are treated as \"bad\" and breaching the threshold. Possible values: ALARM, OK.",
            "Missing data points are treated as \"good\" and within the threshold. Possible values: ALARM, OK."
        ]
    }
    column_df = pd.DataFrame(column_data)
    st.table(column_df)

    # States
    st.write("#### States")
    state_data = {
        "State": ["ALARM", "OK", "Retain current state", "INSUFFICIENT_DATA"],
        "Description": [
            "Alarm state is triggered.",
            "Everything is within the threshold.",
            "The current alarm state is maintained.",
            "Not enough data to make a determination."
        ]
    }
    state_df = pd.DataFrame(state_data)
    st.table(state_df)

if __name__ == "__main__":
    main()