File size: 2,422 Bytes
9db6dea
 
010071f
 
3651f7b
010071f
3651f7b
1fadf44
5405a02
 
 
1fadf44
5405a02
 
010071f
5405a02
 
 
 
 
 
 
 
1fadf44
 
 
9db6dea
e9cc996
5405a02
9db6dea
3dbe4ef
 
5405a02
e9cc996
5405a02
e9cc996
 
 
5405a02
3dbe4ef
 
e9cc996
3dbe4ef
e9cc996
5405a02
9db6dea
3dbe4ef
9db6dea
 
 
e9cc996
5405a02
3dbe4ef
5405a02
e9cc996
5405a02
a9cd5f0
5405a02
 
 
 
 
 
 
 
3dbe4ef
 
e9cc996
 
 
3dbe4ef
5405a02
e9cc996
5405a02
e9cc996
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# tools/forecaster.py

import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go

def forecast_metric_tool(file_path: str, date_col: str, value_col: str):
    """
    Forecast the next 3 periods for any numeric metric.
    - Saves a date‐indexed Plotly PNG under /tmp via the safe write monkey‐patch.
    - Returns a text table of the forecast.
    """

    # 0) Read full CSV
    df = pd.read_csv(file_path)

    # 1) Check that both columns actually exist
    if date_col not in df.columns:
        return f"❌ Date column '{date_col}' not found in your data."
    if value_col not in df.columns:
        return f"❌ Metric column '{value_col}' not found in your data."

    # 2) Parse dates
    try:
        df[date_col] = pd.to_datetime(df[date_col])
    except Exception:
        return f"❌ Could not parse '{date_col}' as dates."

    # 3) Coerce metric to numeric & drop invalid rows
    df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
    df = df.dropna(subset=[date_col, value_col])
    if df.empty:
        return f"❌ After coercion, no valid data remains for '{value_col}'."

    # 4) Sort & index by date, collapse duplicates
    df = df.sort_values(date_col).set_index(date_col)
    df = df[[value_col]].groupby(level=0).mean()

    # 5) Infer a frequency and re‐index
    freq = pd.infer_freq(df.index)
    if freq is None:
        freq = "D"  # fallback to daily
    df = df.asfreq(freq)

    # 6) Fit ARIMA (1,1,1)
    try:
        model     = ARIMA(df[value_col], order=(1, 1, 1))
        model_fit = model.fit()
    except Exception as e:
        return f"❌ ARIMA fitting failed: {e}"

    # 7) Produce a proper date‐indexed forecast
    fc_res   = model_fit.get_forecast(steps=3)
    forecast = fc_res.predicted_mean

    # 8) Plot history + forecast
    fig = go.Figure()
    fig.add_scatter(
        x=df.index, y=df[value_col],
        mode="lines", name=value_col
    )
    fig.add_scatter(
        x=forecast.index, y=forecast,
        mode="lines+markers", name="Forecast"
    )
    fig.update_layout(
        title=f"{value_col} Forecast",
        xaxis_title=date_col,
        yaxis_title=value_col,
        template="plotly_dark",
    )
    fig.write_image("forecast_plot.png")  # lands in /tmp via our monkey‐patch

    # 9) Return the forecast as a text table
    return forecast.to_frame(name="Forecast").to_string()