File size: 3,120 Bytes
eec9db3
 
010071f
 
3651f7b
010071f
eec9db3
 
 
 
 
 
 
 
1fadf44
eec9db3
 
5405a02
eec9db3
 
 
5405a02
eec9db3
 
 
 
 
 
 
 
 
 
 
5405a02
eec9db3
 
 
 
 
 
1fadf44
 
 
9db6dea
e9cc996
eec9db3
3dbe4ef
 
eec9db3
e9cc996
eec9db3
 
 
 
 
 
 
e9cc996
eec9db3
3dbe4ef
 
eec9db3
 
 
 
 
 
e9cc996
eec9db3
9db6dea
eec9db3
 
9db6dea
 
e9cc996
eec9db3
 
5405a02
eec9db3
e9cc996
eec9db3
a9cd5f0
eec9db3
 
 
 
 
5405a02
eec9db3
 
 
 
 
5405a02
3dbe4ef
 
e9cc996
 
eec9db3
 
 
 
 
 
 
3dbe4ef
eec9db3
 
 
e9cc996
eec9db3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import tempfile
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go


def forecast_metric_tool(
    file_path: str,
    date_col: str,
    value_col: str,
    periods: int = 3,
    output_dir: str = "/tmp"
):
    """
    Load a CSV or Excel file, parse a time series metric, fit an ARIMA(1,1,1) model,
    forecast the next `periods` steps, and save a combined history+forecast plot.

    Returns:
      forecast_df (pd.DataFrame): next-period predicted values, indexed by date.
      plot_path (str): full path to the saved PNG plot.

    Errors return a string starting with '❌' describing the problem.
    """
    # 0) Load data (CSV or Excel)
    ext = os.path.splitext(file_path)[1].lower()
    try:
        if ext in ('.xls', '.xlsx'):
            df = pd.read_excel(file_path)
        else:
            df = pd.read_csv(file_path)
    except Exception as e:
        return f"❌ Failed to load file: {e}"

    # 1) Validate columns
    for col in (date_col, value_col):
        if col not in df.columns:
            return f"❌ Column '{col}' not found."

    # 2) Parse dates and numeric
    try:
        df[date_col] = pd.to_datetime(df[date_col])
    except Exception:
        return f"❌ Could not parse '{date_col}' as dates."

    df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
    df = df.dropna(subset=[date_col, value_col])
    if df.empty:
        return f"❌ No valid rows after dropping NaNs in '{date_col}'/'{value_col}'."

    # 3) Aggregate duplicates & index
    df = (
        df[[date_col, value_col]]
        .groupby(date_col, as_index=True)
        .mean()
        .sort_index()
    )

    # 4) Infer frequency
    freq = pd.infer_freq(df.index)
    if freq is None:
        freq = 'D'  # fallback
    try:
        df = df.asfreq(freq)
    except ValueError as e:
        # if duplicates remain
        df = df[~df.index.duplicated(keep='first')].asfreq(freq)

    # 5) Fit ARIMA
    try:
        model = ARIMA(df[value_col], order=(1, 1, 1))
        fit = model.fit()
    except Exception as e:
        return f"❌ ARIMA fitting failed: {e}"

    # 6) Forecast future
    fc_res = fit.get_forecast(steps=periods)
    forecast = fc_res.predicted_mean
    forecast_df = forecast.to_frame(name='Forecast')

    # 7) Plot history + forecast
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=df.index, y=df[value_col],
            mode='lines+markers', name=value_col
        )
    )
    fig.add_trace(
        go.Scatter(
            x=forecast.index, y=forecast,
            mode='lines+markers', name='Forecast'
        )
    )
    fig.update_layout(
        title=f"{value_col} Forecast",
        xaxis_title=date_col,
        yaxis_title=value_col,
        template='plotly_dark',
    )

    # 8) Save to temporary file
    os.makedirs(output_dir, exist_ok=True)
    tmp = tempfile.NamedTemporaryFile(
        suffix='.png', prefix='forecast_', dir=output_dir, delete=False
    )
    plot_path = tmp.name
    tmp.close()
    fig.write_image(plot_path)

    return forecast_df, plot_path