import gradio as gr from huggingface_hub import from_pretrained_keras import pandas as pd import numpy as np import json from matplotlib import pyplot as plt f = open('scaler.json') scaler = json.load(f) TIME_STEPS = 288 # Generated training sequences for use in the model. def create_sequences(values, time_steps=TIME_STEPS): output = [] for i in range(len(values) - time_steps + 1): output.append(values[i : (i + time_steps)]) return np.stack(output) def normalize_data(data): df_test_value = (data - scaler["mean"]) / scaler["std"] return df_test_value def plot_test_data(df_test_value): fig, ax = plt.subplots(figsize=(12, 6)) df_test_value.plot(legend=False, ax=ax) ax.set_xlabel("Time") ax.set_ylabel("Value") ax.set_title("Input Test Data") return fig def get_anomalies(df_test_value): # Create sequences from test values. x_test = create_sequences(df_test_value.values) model = from_pretrained_keras("keras-io/timeseries-anomaly-detection") # Get test MAE loss. x_test_pred = model.predict(x_test) test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) test_mae_loss = test_mae_loss.reshape((-1)) # Detect all the samples which are anomalies. anomalies = test_mae_loss > scaler["threshold"] return anomalies def plot_anomalies(df_test_value, data, anomalies): # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies anomalous_data_indices = [] for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1): if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]): anomalous_data_indices.append(data_idx) df_subset = data.iloc[anomalous_data_indices] fig, ax = plt.subplots(figsize=(12, 6)) data.plot(legend=False, ax=ax) df_subset.plot(legend=False, ax=ax, color="r") ax.set_xlabel("Time") ax.set_ylabel("Value") ax.set_title("Anomalous Data Points") return fig def clean_data(df): # Check if 'Date' and 'Hour' columns exist in the dataframe if "Date" in df.columns and "Hour" in df.columns: # Convert "Date" and "Hour" columns into datetime format df["timestamp"] = pd.to_datetime(df["Date"] + ' ' + df["Hour"].astype(str) + ":00:00") # Keep only necessary columns df = df[["timestamp", "Hourly_Labor_Hours_Total"]] # Rename column df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True) elif "timestamp" in df.columns: # If 'timestamp' column exists, rename the value column if necessary if "Hourly_Labor_Hours_Total" in df.columns: df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True) df = df[["timestamp", "value"]] else: raise ValueError("Input data must have either 'Date' and 'Hour' columns, or a 'timestamp' column.") return df def master(file): # read file data = pd.read_csv(file.name) data['timestamp'] = pd.to_datetime(data['timestamp']) # clean data data = clean_data(data) data.set_index("timestamp", inplace=True) df_test_value = normalize_data(data) # plot input test data plot1 = plot_test_data(df_test_value) # predict anomalies = get_anomalies(df_test_value) #plot anomalous data points plot2 = plot_anomalies(df_test_value, data, anomalies) return plot2 outputs = gr.outputs.Image() iface = gr.Interface( fn=master, inputs=gr.inputs.File(label="CSV File"), outputs=outputs, examples=["art_daily_jumpsup.csv","labor-hourly-short-csv"], title="Timeseries Anomaly Detection Using an Autoencoder", description="Anomaly detection of timeseries data." ) iface.launch()