import gradio as gr
from huggingface_hub import from_pretrained_keras
import pandas as pd
import numpy as np
import json
from matplotlib import pyplot as plt

f = open('scaler.json')
scaler = json.load(f)

TIME_STEPS = 288

# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)


def normalize_data(data):
    df_test_value = (data - scaler["mean"]) / scaler["std"]
    return df_test_value

def plot_test_data(df_test_value):
    fig, ax = plt.subplots(figsize=(12, 6))
    df_test_value.plot(legend=False, ax=ax)
    ax.set_xlabel("Time")
    ax.set_ylabel("Value")
    ax.set_title("Input Test Data")
    return fig

def get_anomalies(df_test_value):
    # Create sequences from test values.
    x_test = create_sequences(df_test_value.values)
    model = from_pretrained_keras("keras-io/timeseries-anomaly-detection")

    # Get test MAE loss.
    x_test_pred = model.predict(x_test)
    test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
    test_mae_loss = test_mae_loss.reshape((-1))

    # Detect all the samples which are anomalies.
    anomalies = test_mae_loss > scaler["threshold"]
    return anomalies

def plot_anomalies(df_test_value, data, anomalies):
    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data_indices = []
    for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1):
        if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)
    df_subset = data.iloc[anomalous_data_indices]
    fig, ax = plt.subplots(figsize=(12, 6))
    data.plot(legend=False, ax=ax)
    df_subset.plot(legend=False, ax=ax, color="r")
    ax.set_xlabel("Time")
    ax.set_ylabel("Value")
    ax.set_title("Anomalous Data Points")
    return fig

def clean_data(df):
    # Check if 'Date' and 'Hour' columns exist in the dataframe
    if "Date" in df.columns and "Hour" in df.columns:
        # Convert "Date" and "Hour" columns into datetime format
        df["timestamp"] = pd.to_datetime(df["Date"] + ' ' + df["Hour"].astype(str) + ":00:00")
        # Keep only necessary columns
        df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
        # Rename column
        df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
    elif "timestamp" in df.columns:
        # If 'timestamp' column exists, rename the value column if necessary
        if "Hourly_Labor_Hours_Total" in df.columns:
            df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
        df = df[["timestamp", "value"]]
    else:
        raise ValueError("Input data must have either 'Date' and 'Hour' columns, or a 'timestamp' column.")
    return df

def master(file):
    # read file
    data = pd.read_csv(file.name)
    data['timestamp'] = pd.to_datetime(data['timestamp'])

    # clean data
    data = clean_data(data)
    data.set_index("timestamp", inplace=True)

    df_test_value = normalize_data(data)
    # plot input test data
    plot1 = plot_test_data(df_test_value)
    # predict
    anomalies = get_anomalies(df_test_value)
    #plot anomalous data points
    plot2 = plot_anomalies(df_test_value, data, anomalies)
    return plot2

outputs = gr.outputs.Image()

iface = gr.Interface(
    fn=master,
    inputs=gr.inputs.File(label="CSV File"),
    outputs=outputs,
    examples=["art_daily_jumpsup.csv","labor-hourly-short-csv"],
    title="Timeseries Anomaly Detection Using an Autoencoder",
    description="Anomaly detection of timeseries data."
)

iface.launch()