Zero-Shot-Forecasting

Running

App Files Files Community

azrai99 commited on Jul 24, 2024

Commit

e5e340b

verified ·

1 Parent(s): a4bae1a

Update app.py

Browse files

Files changed (1) hide show

app.py +213 -269

app.py CHANGED Viewed

@@ -1,276 +1,220 @@
-from time import time
-import numpy as np
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
 import streamlit as st
-from neuralforecast.losses.pytorch import MAE, RMSE, MAPE, SMAPE, MASE
-from st_aggrid import AgGrid
-from src.nf import MODELS, forecast_pretrained_model
-from src.model_descriptions import model_cards
-DATASETS = {
-    "Electricity (Ercot COAST)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/ercot_COAST.csv",
-    "Web Traffic (Peyton Manning)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/peyton_manning.csv",
-    "Demand (AirPassengers)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv",
-    "Finance (Exchange USD-EUR)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/usdeur.csv",
 }
-@st.cache_data
-def convert_df(df):
-    # IMPORTANT: Cache the conversion to prevent computation on every rerun
-    return df.to_csv(index=False).encode("utf-8")
-def plot(df, uid, df_forecast, model):
-    figs = []
-    figs += [
-        go.Scatter(
-            x=df["ds"],
-            y=df["y"],
-            mode="lines",
-            marker=dict(color="#236796"),
-            legendrank=1,
-            name=uid,
-        ),
-    ]
-    if df_forecast is not None:
-        ds_f = df_forecast["ds"].to_list()
-        lo = df_forecast["forecast_lo_90"].to_list()
-        hi = df_forecast["forecast_hi_90"].to_list()
-        figs += [
-            go.Scatter(
-                x=ds_f + ds_f[::-1],
-                y=hi + lo[::-1],
-                fill="toself",
-                fillcolor="#E7C4C0",
-                mode="lines",
-                line=dict(color="#E7C4C0"),
-                name="Prediction Intervals (90%)",
-                legendrank=5,
-                opacity=0.5,
-                hoverinfo="skip",
-            ),
-            go.Scatter(
-                x=ds_f,
-                y=df_forecast["forecast"],
-                mode="lines",
-                legendrank=4,
-                marker=dict(color="#E7C4C0"),
-                name=f"Forecast {uid}",
-            ),
-        ]
-    fig = go.Figure(figs)
-    fig.update_layout(
-        {"plot_bgcolor": "rgba(0, 0, 0, 0)", "paper_bgcolor": "rgba(0, 0, 0, 0)"}
-    )
-    fig.update_layout(
-        title=f"Forecasts for {uid} using Transfer Learning (from {model})",
-        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
-        margin=dict(l=20, b=20),
-        xaxis=dict(rangeslider=dict(visible=True)),
-    )
-    initial_range = [df.tail(200)["ds"].iloc[0], ds_f[-1]]
-    fig["layout"]["xaxis"].update(range=initial_range)
-    return fig
-def st_transfer_learning():
-    st.set_page_config(
-        page_title="Time Series Forecasting",
-        page_icon="🔮",
-        layout="wide",
-        initial_sidebar_state="expanded",
-    )
-    st.title(
-        "Transfer Learning: Revolutionizing Time Series"
-    )
-    st.write(
-        "<style>div.block-container{padding-top:2rem;}</style>", unsafe_allow_html=True
-    )
-    intro = """
-        The success of startups like Open AI and Stability highlights the potential for transfer learning (TL) techniques to have a similar impact on the field of time series forecasting.
-        TL can achieve lightning-fast predictions with a fraction of the computational cost by pre-training a flexible model on a large dataset and then using it on another dataset with little to no additional training.
-        In this live demo, you can use pre-trained models by Nixtla (trained on the M4 dataset) to predict your own datasets. You can also see how the models perform on unseen example datasets.
-    """
-    st.write(intro)
-    required_cols = ["ds", "y"]
-    with st.sidebar.expander("Dataset", expanded=False):
-        data_selection = st.selectbox("Select example dataset", DATASETS.keys())
-        data_url = DATASETS[data_selection]
-        url_json = st.text_input("Data (you can pass your own url here)", data_url)
-        st.write(
-            "You can also upload a CSV file like [this one](https://github.com/Nixtla/transfer-learning-time-series/blob/main/datasets/air_passengers.csv)."
-        )
-        uploaded_file = st.file_uploader("Upload CSV")
-        with st.form("Data"):
-            if uploaded_file is not None:
-                df = pd.read_csv(uploaded_file)
-                cols = df.columns
-                timestamp_col = st.selectbox("Timestamp column", options=cols)
-                value_col = st.selectbox("Value column", options=cols)
-            else:
-                timestamp_col = st.text_input("Timestamp column", value="timestamp")
-                value_col = st.text_input("Value column", value="value")
-            st.write("You must press Submit each time you want to forecast.")
-            submitted = st.form_submit_button("Submit")
-            if submitted:
-                if uploaded_file is None:
-                    st.write("Please provide a dataframe.")
-                    if url_json.endswith("json"):
-                        df = pd.read_json(url_json)
-                    else:
-                        df = pd.read_csv(url_json)
-                    df = df.rename(
-                        columns=dict(zip([timestamp_col, value_col], required_cols))
-                    )
-                else:
-                    df = df.rename(
-                        columns=dict(zip([timestamp_col, value_col], required_cols))
-                    )
-            else:
-                if url_json.endswith("json"):
-                    df = pd.read_json(url_json)
-                else:
-                    df = pd.read_csv(url_json)
-                cols = df.columns
-                if "unique_id" in cols:
-                    cols = cols[-2:]
-                df = df.rename(columns=dict(zip(cols, required_cols)))
-            if "unique_id" not in df:
-                df.insert(0, "unique_id", "ts_0")
-            df["ds"] = pd.to_datetime(df["ds"])
-            df = df.sort_values(["unique_id", "ds"])
-    with st.sidebar:
-        st.write("Define the pretrained model you want to use to forecast your data")
-        model_name = st.selectbox("Select your model", tuple(MODELS.keys()))
-        model_file = MODELS[model_name]["model"]
-        st.write("Choose how many steps you want to forecast")
-        fh = st.number_input("Forecast horizon", value=18)
-        st.write(
-            "Choose for how many steps the pretrained model will be updated using your data (use 0 for fast computation)"
-        )
-        max_steps = st.number_input("N-shot inference", value=0)
-    # tabs
-    tab_fcst, tab_cv, tab_docs = st.tabs(
-        [
-            "📈 Forecast",
-            "🔎 Cross Validation",
-            "📚 Documentation",
-        ]
-    )
-    uids = df["unique_id"].unique()
-    fcst_cols = ["forecast_lo_90", "forecast", "forecast_hi_90"]
-    with tab_fcst:
-        uid = uids[0]
-        col1, col2 = st.columns([2, 4])
-        with col1:
-            tab_insample, tab_forecast = st.tabs(
-                ["Modify input data", "Modify forecasts"]
-            )
-            with tab_insample:
-                df_grid = df.query("unique_id == @uid").drop(columns="unique_id")
-                grid_table = AgGrid(
-                    df_grid,
-                    editable=True,
-                    theme="alpine",
-                    fit_columns_on_grid_load=True,
-                    height=360,
-                )
-                df.loc[df["unique_id"] == uid, "y"] = (
-                    grid_table["data"].sort_values("ds")["y"].values
-                )
-                # forecast code
-                init = time()
-                df_forecast = forecast_pretrained_model(df, model_file, fh, max_steps)
-                end = time()
-                df_forecast = df_forecast.rename(
-                    columns=dict(zip(["y_5", "y_50", "y_95"], fcst_cols))
-                )
-            with tab_forecast:
-                df_fcst_grid = df_forecast.query("unique_id == @uid").filter(
-                    ["ds", "forecast"]
-                )
-                grid_fcst_table = AgGrid(
-                    df_fcst_grid,
-                    editable=True,
-                    theme="alpine",
-                    fit_columns_on_grid_load=True,
-                    height=360,
-                )
-                changes = (
-                    df_forecast.query("unique_id == @uid")["forecast"].values
-                    - grid_fcst_table["data"].sort_values("ds")["forecast"].values
-                )
-                for col in fcst_cols:
-                    df_forecast.loc[df_forecast["unique_id"] == uid, col] = (
-                        df_forecast.loc[df_forecast["unique_id"] == uid, col] - changes
-                    )
-        with col2:
-            if uploaded_file is not None:
-                fct_name = value_col
-            else:
-                fct_name=uid
-            st.plotly_chart(
-                plot(
-                    df.query("unique_id == @uid"),
-                    fct_name,
-                    df_forecast.query("unique_id == @uid"),
-                    model_file,
-                ),
-                use_container_width=True,
-            )
-        st.write(f"Done in: {np.round(end-init, 2)} secs.")
-        st.write(f"Forecast for {fh} steps ahead.")
-        st.write("You can download the forecast for the entire dataframe here:")
-        csv = convert_df(
-            df_forecast[["unique_id", "ds"] + fcst_cols].sort_values(
-                ["unique_id", "ds"]
-            )
-        )
-        st.download_button(
-            label="Download CSV",
-            data=csv,
-            file_name="forecast.csv",
-            mime="text/csv",
-        )
-        st.write(df_forecast[["unique_id", "ds"] + fcst_cols].tail(10))
-    with tab_cv:
-        st.write(
-            "To enable Cross Validation, use the advanced forecasting tool at our [site](https://nixtla.github.io/transfer-learning-time-series/)."
-        )
-        df_forecast_cv = None
-    with tab_docs:
-        st.write("Documentation (Work in progress)")
-        st.write(model_cards[model_name])
-    with st.sidebar.expander("Data info", expanded=False):
-        st.write(df.describe())
-        csv = convert_df(df)
-        st.download_button(
-            label="Download data as CSV",
-            data=csv,
-            file_name="dataset.csv",
-            mime="text/csv",
         )
-if __name__ == "__main__":
-    st_transfer_learning()

 import streamlit as st
+import pandas as pd
+import matplotlib.pyplot as plt
+from neuralforecast.core import NeuralForecast
+from neuralforecast.models import NHITS, TimesNet, LSTM, TFT
+from neuralforecast.losses.pytorch import HuberMQLoss
+import time
+# Paths for saving models
+nhits_paths = {
+    'D': './M4/NHITS/daily',
+    'M': './M4/NHITS/monthly',
+    'H': './M4/NHITS/hourly',
+    'W': './M4/NHITS/weekly',
+    'Y': './M4/NHITS/yearly'
 }
+timesnet_paths = {
+    'D': './M4/TimesNet/daily',
+    'M': './M4/TimesNet/monthly',
+    'H': './M4/TimesNet/hourly',
+    'W': './M4/TimesNet/weekly',
+    'Y': './M4/TimesNet/yearly'
+}
+lstm_paths = {
+    'D': './M4/LSTM/daily',
+    'M': './M4/LSTM/monthly',
+    'H': './M4/LSTM/hourly',
+    'W': './M4/LSTM/weekly',
+    'Y': './M4/LSTM/yearly'
+}
+tft_paths = {
+    'D': './M4/TFT/daily',
+    'M': './M4/TFT/monthly',
+    'H': './M4/TFT/hourly',
+    'W': './M4/TFT/weekly',
+    'Y': './M4/TFT/yearly'
+}
+@st.cache_resource
+def load_model(path, freq):
+    nf = NeuralForecast.load(path=path)
+    return nf
+nhits_models = {freq: load_model(path, freq) for freq, path in nhits_paths.items()}
+timesnet_models = {freq: load_model(path, freq) for freq, path in timesnet_paths.items()}
+lstm_models = {freq: load_model(path, freq) for freq, path in lstm_paths.items()}
+tft_models = {freq: load_model(path, freq) for freq, path in tft_paths.items()}
+def generate_forecast(model, df):
+    forecast_df = model.predict(df=df)
+    return forecast_df
+def determine_frequency(df):
+    df['ds'] = pd.to_datetime(df['ds'])
+    df = df.set_index('ds')
+    freq = pd.infer_freq(df.index)
+    return freq
+def plot_forecasts(forecast_df, train_df, title):
+    fig, ax = plt.subplots(1, 1, figsize=(20, 7))
+    plot_df = pd.concat([train_df, forecast_df]).set_index('ds')
+    historical_col = 'y'
+    forecast_col = next((col for col in plot_df.columns if 'median' in col), None)
+    lo_col = next((col for col in plot_df.columns if 'lo-90' in col), None)
+    hi_col = next((col for col in plot_df.columns if 'hi-90' in col), None)
+    if forecast_col is None:
+        raise KeyError("No forecast column found in the data.")
+    plot_df[[historical_col, forecast_col]].plot(ax=ax, linewidth=2, label=['Historical', 'Forecast'])
+    if lo_col and hi_col:
+        ax.fill_between(
+            plot_df.index,
+            plot_df[lo_col],
+            plot_df[hi_col],
+            color='blue',
+            alpha=0.3,
+            label='90% Confidence Interval'
         )
+    ax.set_title(title, fontsize=22)
+    ax.set_ylabel('Value', fontsize=20)
+    ax.set_xlabel('Timestamp [t]', fontsize=20)
+    ax.legend(prop={'size': 15})
+    ax.grid()
+    st.pyplot(fig)
+def select_model_based_on_frequency(freq, nhits_models, timesnet_models, lstm_models, tft_models):
+    if freq == 'D':
+        return nhits_models['D'], timesnet_models['D'], lstm_models['D'], tft_models['D']
+    elif freq == 'M':
+        return nhits_models['M'], timesnet_models['M'], lstm_models['M'], tft_models['M']
+    elif freq == 'H':
+        return nhits_models['H'], timesnet_models['H'], lstm_models['H'], tft_models['H']
+    elif freq in ['W', 'W-SUN']:
+        return nhits_models['W'], timesnet_models['W'], lstm_models['W'], tft_models['W']
+    elif freq in ['Y', 'Y-DEC']:
+        return nhits_models['Y'], timesnet_models['Y'], lstm_models['Y'], tft_models['Y']
+    else:
+        raise ValueError(f"Unsupported frequency: {freq}")
+def select_model(horizon, model_type, max_steps=200):
+    if model_type == 'NHITS':
+        return NHITS(input_size=5 * horizon,
+                     h=horizon,
+                     max_steps=max_steps,
+                     stack_types=3*['identity'],
+                     n_blocks=3*[1],
+                     mlp_units=[[256, 256] for _ in range(3)],
+                     n_pool_kernel_size=3*[1],
+                     batch_size=32,
+                     scaler_type='standard',
+                     n_freq_downsample=[12, 4, 1],
+                     loss=HuberMQLoss(level=[90]))
+    elif model_type == 'TimesNet':
+        return TimesNet(h=horizon,
+                        input_size=horizon * 5,
+                        hidden_size=16,
+                        conv_hidden_size=32,
+                        loss=HuberMQLoss(level=[90]),
+                        scaler_type='standard',
+                        learning_rate=1e-3,
+                        max_steps=max_steps,
+                        val_check_steps=200,
+                        valid_batch_size=64,
+                        windows_batch_size=128,
+                        inference_windows_batch_size=512)
+    elif model_type == 'LSTM':
+        return LSTM(h=horizon,
+                    input_size=horizon * 5,
+                    loss=HuberMQLoss(level=[90]),
+                    scaler_type='standard',
+                    encoder_n_layers=2,
+                    encoder_hidden_size=64,
+                    context_size=10,
+                    decoder_hidden_size=64,
+                    decoder_layers=2,
+                    max_steps=max_steps)
+    elif model_type == 'TFT':
+        return TFT(h=horizon,
+                   input_size=horizon,
+                   hidden_size=16,
+                   loss=HuberMQLoss(level=[90]),
+                   learning_rate=0.005,
+                   scaler_type='standard',
+                   windows_batch_size=128,
+                   max_steps=max_steps,
+                   val_check_steps=200,
+                   valid_batch_size=64,
+                   enable_progress_bar=True)
+    else:
+        raise ValueError(f"Unsupported model type: {model_type}")
+def forecast_time_series(df, model_type, freq, horizon, max_steps=200):
+    start_time = time.time()  # Start timing
+    if freq:
+        df['ds'] = pd.date_range(start='1970-01-01', periods=len(df), freq=freq)
+    else:
+        freq = determine_frequency(df)
+        st.write(f"Determined frequency: {freq}")
+    df['ds'] = pd.to_datetime(df['ds'], errors='coerce')
+    df = df.dropna(subset=['ds'])
+    model = select_model(horizon, model_type, max_steps)
+    forecast_results = {}
+    st.write(f"Generating forecast using {model_type} model...")
+    forecast_results[model_type] = generate_forecast(model, df, freq)
+    for model_name, forecast_df in forecast_results.items():
+        plot_forecasts(forecast_df, df, f'{model_name} Forecast Comparison')
+    end_time = time.time()  # End timing
+    time_taken = end_time - start_time
+    st.success(f"Time taken for {model_type} forecast: {time_taken:.2f} seconds")
+# Streamlit App
+st.title("Dynamic and Automatic Time Series Forecasting")
+# Upload dataset
+uploaded_file = st.file_uploader("Upload your time series data (CSV)", type=["csv"])
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+else:
+    st.warning("Using default data")
+    df = AirPassengersDF.copy()
+# Model selection and forecasting
+st.subheader("Transfer Learning Forecasting")
+model_choice = st.selectbox("Select model", ["NHITS", "TimesNet", "LSTM", "TFT"])
+horizon = st.slider("Forecast horizon", 1, 100, 10)
+# Determine frequency of data
+frequency = determine_frequency(df)
+st.write(f"Detected frequency: {frequency}")
+# Load pre-trained models
+nhits_model, timesnet_model, lstm_model, tft_model = select_model_based_on_frequency(frequency, nhits_models, timesnet_models, lstm_models, tft_models)
+forecast_results = {}
+start_time = time.time()  # Start timing
+if model_choice == "NHITS":
+    forecast_results['NHITS'] = generate_forecast(nhits_model, df)
+elif model_choice == "TimesNet":
+    forecast_results['TimesNet'] = generate_forecast(timesnet_model, df)
+elif model_choice == "LSTM":
+    forecast_results['LSTM'] = generate_forecast(lstm_model, df)
+elif model_choice == "TFT":
+    forecast_results['TFT'] = generate_forecast(tft_model, df)
+for model_name, forecast_df in forecast_results.items():
+    plot_forecasts(forecast_df, df, f'{model_name} Forecast')
+end_time = time.time()  # End timing
+time_taken = end_time - start_time
+st.success(f"Time taken for {model_choice} forecast: {time_taken:.2f} seconds")
+# Dynamic forecasting
+st.subheader("Dynamic Forecasting")
+dynamic_model_choice = st.selectbox("Select model for dynamic forecasting", ["NHITS", "TimesNet", "LSTM", "TFT"], key="dynamic_model_choice")
+dynamic_horizon = st.slider("Forecast horizon for dynamic forecasting", 1, 100, 10, key="dynamic_horizon")
+forecast_time_series(df, dynamic_model_choice, frequency, dynamic_horizon)