File size: 2,160 Bytes
9db6dea
 
010071f
 
3651f7b
010071f
3651f7b
1fadf44
e9cc996
 
1fadf44
e9cc996
010071f
1fadf44
 
 
9db6dea
e9cc996
 
9db6dea
3dbe4ef
 
 
e9cc996
 
 
 
 
 
 
3dbe4ef
 
e9cc996
3dbe4ef
e9cc996
 
9db6dea
3dbe4ef
9db6dea
 
 
e9cc996
 
3dbe4ef
e9cc996
 
 
a9cd5f0
e9cc996
 
3dbe4ef
 
e9cc996
 
 
3dbe4ef
e9cc996
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# tools/forecaster.py

import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go

def forecast_metric_tool(file_path: str, date_col: str, value_col: str):
    """
    Forecast next 3 periods for any numeric metric, saving
    the PNG under /tmp and returning the forecast table as text.
    """
    # 1) Load & parse dates
    df = pd.read_csv(file_path)
    try:
        df[date_col] = pd.to_datetime(df[date_col])
    except Exception:
        return f"❌ Could not parse '{date_col}' as dates."

    # 2) Coerce metric to numeric & drop invalid rows
    df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
    df = df.dropna(subset=[date_col, value_col])
    if df.empty:
        return f"❌ No valid data for '{value_col}'."

    # 3) Sort by date, set index, then collapse any duplicate timestamps
    df = df.sort_values(date_col).set_index(date_col)
    # If you have multiple rows for the same timestamp, take their mean
    df = df[[value_col]].groupby(level=0).mean()

    # 4) Infer frequency (e.g. 'D', 'M', etc.) and reindex
    freq = pd.infer_freq(df.index)
    if freq is None:
        freq = "D"  # fallback to daily
    df = df.asfreq(freq)

    # 5) Fit ARIMA
    try:
        model     = ARIMA(df[value_col], order=(1, 1, 1))
        model_fit = model.fit()
    except Exception as e:
        return f"❌ ARIMA fitting failed: {e}"

    # 6) Forecast with a proper DatetimeIndex
    fc_res   = model_fit.get_forecast(steps=3)
    forecast = fc_res.predicted_mean  # pd.Series indexed by future dates

    # 7) Plot history + forecast
    fig = go.Figure()
    fig.add_scatter(x=df.index,       y=df[value_col], mode="lines", name=value_col)
    fig.add_scatter(x=forecast.index, y=forecast,     mode="lines+markers", name="Forecast")
    fig.update_layout(
        title=f"{value_col} Forecast",
        xaxis_title=date_col,
        yaxis_title=value_col,
        template="plotly_dark",
    )
    fig.write_image("forecast_plot.png")  # safely lands in /tmp via monkey-patch

    # 8) Return the forecast table as plain text
    return forecast.to_frame(name="Forecast").to_string()