Spaces:

phitoduck
/

cloudwatch-simulator

Sleeping

App Files Files Community

phitoduck commited on Jul 27, 2024

Commit

64bdbaf

1 Parent(s): bc28645

plotting lines, but with matplotlib everywhere

Browse files

Files changed (1) hide show

streamlit_app.py +24 -52

streamlit_app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pandas as pd
 import matplotlib.pyplot as plt
 from datetime import datetime, time, date
 from typing import List, Dict, Any, Tuple
-from utils import generate_random_data, calculate_percentile, evaluate_alarm_state, aggregate_data
 from textwrap import dedent
 # Constants
@@ -21,13 +21,15 @@ def main():
     if not st.session_state.df.empty:
         display_dataframe("Raw Event Data", st.session_state.df)
-    # Section 2 - Calculate Percentile
-    st.header("Section 2 - Calculate Percentile")
-    percentile_form()
-    if not st.session_state.percentile_df.empty:
-        display_dataframe("Aggregated Summary Data", st.session_state.percentile_df)
     # Section 3 - Summary Data Aggregated by Period
     st.header("Section 3 - Summary Data Aggregated by Period")
@@ -35,13 +37,13 @@ def main():
     if not st.session_state.summary_by_period_df.empty:
         display_dataframe("Summary Data Aggregated by Period", st.session_state.summary_by_period_df)
     # Section 4 - Evaluate Alarm State
     st.header("Section 4 - Evaluate Alarm State")
     alarm_state_form()
     if not st.session_state.alarm_state_df.empty:
-        plot_time_series(st.session_state.summary_by_period_df, st.session_state.threshold_input, st.session_state.alarm_condition_input, st.session_state.evaluation_range_input)
         display_alarm_state_evaluation(st.session_state.alarm_state_df)
     display_key_tables()
@@ -49,8 +51,8 @@ def main():
 def initialize_session_state() -> None:
     if 'df' not in st.session_state:
         st.session_state.df = pd.DataFrame()
-    if 'percentile_df' not in st.session_state:
-        st.session_state.percentile_df = pd.DataFrame()
     if 'summary_by_period_df' not in st.session_state:
         st.session_state.summary_by_period_df = pd.DataFrame()
     if 'alarm_state_df' not in st.session_state:
@@ -75,29 +77,26 @@ def generate_data_form() -> None:
                 null_percentage=null_percentage_input
             )
-def percentile_form() -> None:
     freq_input = st.selectbox("Period (bin)", ['1min', '5min', '15min'], key='freq_input', help="Select the frequency for aggregating the data.")
-    percentile_input = st.slider("Percentile", min_value=0.0, max_value=1.0, value=0.95, key='percentile_input', help="Select the percentile for calculating the aggregated summary data.")
     if not st.session_state.df.empty:
-        st.session_state.percentile_df = calculate_percentile(st.session_state.df, freq_input, percentile_input)
 def summary_by_period_form() -> None:
     period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
     if not st.session_state.df.empty:
         st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
-    else:
-        st.warning("No data available to aggregate.")
 def alarm_state_form() -> None:
     threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
     datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
     evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
-    aggregation_function_input = st.selectbox(
-        "Aggregation Function",
-        ['p50', 'p95', 'p99', 'max', 'min', 'average'],
-        key='aggregation_function_input',
-        help="Select the aggregation function for visualizing the data and computing alarms."
-    )
     alarm_condition_input = st.selectbox(
         "Alarm Condition",
         ['>', '>=', '<', '<='],
@@ -110,7 +109,7 @@ def alarm_state_form() -> None:
             threshold=threshold_input,
             datapoints_to_alarm=datapoints_to_alarm_input,
             evaluation_range=evaluation_range_input,
-            aggregation_function=aggregation_function_input,
             alarm_condition=alarm_condition_input
         )
@@ -118,9 +117,9 @@ def display_dataframe(title: str, df: pd.DataFrame) -> None:
     st.write(title)
     st.dataframe(df)
-def plot_time_series(df: pd.DataFrame, threshold: int, alarm_condition: str, evaluation_range: int) -> None:
     timestamps = df['Timestamp']
-    response_times = df[st.session_state.aggregation_function_input]
     segments = []
     current_segment = {'timestamps': [], 'values': []}
@@ -141,38 +140,12 @@ def plot_time_series(df: pd.DataFrame, threshold: int, alarm_condition: str, eva
     color = 'tab:blue'
     ax1.set_xlabel('Timestamp')
-    ax1.set_ylabel('Response Time (ms)', color=color)
     for segment in segments:
         ax1.plot(segment['timestamps'], segment['values'], color=color, linewidth=0.5)
         ax1.scatter(segment['timestamps'], segment['values'], color=color, s=10)
-    line_style = '--' if alarm_condition in ['<', '>'] else '-'
-    ax1.axhline(y=threshold, color='r', linestyle=line_style, linewidth=0.8, label='Threshold')
-    ax1.tick_params(axis='y', labelcolor=color)
-    if alarm_condition in ['<=', '<']:
-        ax1.fill_between(timestamps, 0, threshold, color='pink', alpha=0.3)
-    else:
-        ax1.fill_between(timestamps, threshold, response_times.max(), color='pink', alpha=0.3)
-    period_indices = range(len(df))
-    ax2 = ax1.twiny()
-    ax2.set_xticks(period_indices)
-    ax2.set_xticklabels(period_indices, fontsize=8)
-    ax2.set_xlabel('Time Periods', fontsize=8)
-    ax2.xaxis.set_tick_params(width=0.5)
-    for idx in period_indices:
-        if idx % evaluation_range == 0:
-            ax1.axvline(x=df['Timestamp'].iloc[idx], color='green', linestyle='-', alpha=0.3)
-            max_value = max(filter(lambda x: x is not None, df[st.session_state.aggregation_function_input]))
-            ax1.text(df['Timestamp'].iloc[idx], max_value * 0.95, f"[{idx // evaluation_range}]", rotation=90, verticalalignment='bottom', color='grey', alpha=0.7, fontsize=8)
-        else:
-            ax1.axvline(x=df['Timestamp'].iloc[idx], color='grey', linestyle='--', alpha=0.3)
-    ax1.annotate('Alarm threshold', xy=(0.98, threshold), xycoords=('axes fraction', 'data'), ha='right', va='bottom', fontsize=8, color='red', backgroundcolor='none')
     fig.tight_layout()
     st.pyplot(fig)
@@ -197,8 +170,7 @@ def display_key_tables() -> None:
     st.table(symbol_df)
     # Columns
-    st.write(dedent("""\
-    #### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
     Sometimes, no metric events may have been reported during a given time period. In this case,
     you must decide how you will treat missing data points. Ignore it? Or consider it a failure.

 import matplotlib.pyplot as plt
 from datetime import datetime, time, date
 from typing import List, Dict, Any, Tuple
+from utils import generate_random_data, evaluate_alarm_state, aggregate_data
 from textwrap import dedent
 # Constants
     if not st.session_state.df.empty:
         display_dataframe("Raw Event Data", st.session_state.df)
+        plot_time_series(st.session_state.df, "ResponseTime(ms)")
+    # Section 2 - Calculate Aggregations
+    st.header("Section 2 - Calculate Aggregations")
+    aggregation_form()
+    if not st.session_state.aggregated_df.empty:
+        display_dataframe("Aggregated Summary Data", st.session_state.aggregated_df)
+        plot_time_series(st.session_state.aggregated_df, st.session_state.aggregation_function_input)
     # Section 3 - Summary Data Aggregated by Period
     st.header("Section 3 - Summary Data Aggregated by Period")
     if not st.session_state.summary_by_period_df.empty:
         display_dataframe("Summary Data Aggregated by Period", st.session_state.summary_by_period_df)
+        plot_time_series(st.session_state.summary_by_period_df, st.session_state.aggregation_function_input)
     # Section 4 - Evaluate Alarm State
     st.header("Section 4 - Evaluate Alarm State")
     alarm_state_form()
     if not st.session_state.alarm_state_df.empty:
         display_alarm_state_evaluation(st.session_state.alarm_state_df)
     display_key_tables()
 def initialize_session_state() -> None:
     if 'df' not in st.session_state:
         st.session_state.df = pd.DataFrame()
+    if 'aggregated_df' not in st.session_state:
+        st.session_state.aggregated_df = pd.DataFrame()
     if 'summary_by_period_df' not in st.session_state:
         st.session_state.summary_by_period_df = pd.DataFrame()
     if 'alarm_state_df' not in st.session_state:
                 null_percentage=null_percentage_input
             )
+def aggregation_form() -> None:
     freq_input = st.selectbox("Period (bin)", ['1min', '5min', '15min'], key='freq_input', help="Select the frequency for aggregating the data.")
+    aggregation_function_input = st.selectbox(
+        "Aggregation Function",
+        ['p50', 'p95', 'p99', 'max', 'min', 'average'],
+        key='aggregation_function_input',
+        help="Select the aggregation function for visualizing the data."
+    )
     if not st.session_state.df.empty:
+        st.session_state.aggregated_df = aggregate_data(st.session_state.df, freq_input)
 def summary_by_period_form() -> None:
     period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
     if not st.session_state.df.empty:
         st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
 def alarm_state_form() -> None:
     threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
     datapoints_to_alarm_input = st.number_input("Datapoints to Alarm", min_value=1, value=3, key='datapoints_to_alarm_input', help="Specify the number of data points required to trigger an alarm.")
     evaluation_range_input = st.number_input("Evaluation Range", min_value=1, value=5, key='evaluation_range_input', help="Specify the range of data points to evaluate for alarm state.")
     alarm_condition_input = st.selectbox(
         "Alarm Condition",
         ['>', '>=', '<', '<='],
             threshold=threshold_input,
             datapoints_to_alarm=datapoints_to_alarm_input,
             evaluation_range=evaluation_range_input,
+            aggregation_function=st.session_state.aggregation_function_input,
             alarm_condition=alarm_condition_input
         )
     st.write(title)
     st.dataframe(df)
+def plot_time_series(df: pd.DataFrame, column: str) -> None:
     timestamps = df['Timestamp']
+    response_times = df[column]
     segments = []
     current_segment = {'timestamps': [], 'values': []}
     color = 'tab:blue'
     ax1.set_xlabel('Timestamp')
+    ax1.set_ylabel(f'{column} (ms)', color=color)
     for segment in segments:
         ax1.plot(segment['timestamps'], segment['values'], color=color, linewidth=0.5)
         ax1.scatter(segment['timestamps'], segment['values'], color=color, s=10)
     fig.tight_layout()
     st.pyplot(fig)
     st.table(symbol_df)
     # Columns
+    st.write(dedent("""    #### Columns: Strategies for handling missing data points [docs](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/AlarmThatSendsEmail.html#alarms-and-missing-data)
     Sometimes, no metric events may have been reported during a given time period. In this case,
     you must decide how you will treat missing data points. Ignore it? Or consider it a failure.