File size: 3,014 Bytes
92cca14
eec9db3
 
010071f
 
3651f7b
92cca14
010071f
eec9db3
 
 
 
 
 
 
92cca14
1fadf44
92cca14
 
5405a02
eec9db3
92cca14
 
eec9db3
92cca14
eec9db3
 
92cca14
 
 
5405a02
92cca14
 
 
 
eec9db3
92cca14
1fadf44
92cca14
1fadf44
9db6dea
eec9db3
3dbe4ef
 
92cca14
e9cc996
92cca14
eec9db3
 
 
 
 
 
e9cc996
92cca14
 
eec9db3
 
92cca14
eec9db3
e9cc996
92cca14
9db6dea
eec9db3
 
92cca14
 
e9cc996
92cca14
 
 
 
 
 
eec9db3
e9cc996
92cca14
 
 
 
 
 
5405a02
3dbe4ef
 
e9cc996
 
92cca14
eec9db3
 
92cca14
eec9db3
92cca14
eec9db3
 
92cca14
 
 
 
e9cc996
eec9db3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# tools/forecaster.py
import os
import tempfile
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go
from typing import Tuple, Union


def forecast_metric_tool(
    file_path: str,
    date_col: str,
    value_col: str,
    periods: int = 3,
    output_dir: str = "/tmp"
) -> Union[Tuple[pd.DataFrame, str], str]:
    """
    Load CSV or Excel, parse a time series metric, fit ARIMA(1,1,1),
    forecast next `periods` steps, return DataFrame and PNG path.

    Returns:
      - (forecast_df, plot_path) on success
      - error string starting with '❌' on failure
    """
    # Load data
    ext = os.path.splitext(file_path)[1].lower()
    try:
        df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
    except Exception as exc:
        return f"❌ Failed to load file: {exc}"

    # Validate columns
    missing = [c for c in (date_col, value_col) if c not in df.columns]
    if missing:
        return f"❌ Missing column(s): {', '.join(missing)}"

    # Parse and clean
    try:
        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
    except Exception:
        return f"❌ Could not parse '{date_col}' as dates."
    df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
    df = df.dropna(subset=[date_col, value_col])
    if df.empty:
        return f"❌ No valid data after cleaning '{date_col}'/'{value_col}'"

    # Aggregate duplicates and sort
    df = (
        df[[date_col, value_col]]
        .groupby(date_col, as_index=True)
        .mean()
        .sort_index()
    )

    # Infer frequency
    freq = pd.infer_freq(df.index) or 'D'
    try:
        df = df.asfreq(freq)
    except Exception:
        df = df[~df.index.duplicated(keep='first')].asfreq(freq)

    # Fit ARIMA
    try:
        model = ARIMA(df[value_col], order=(1, 1, 1))
        fit = model.fit()
    except Exception as exc:
        return f"❌ ARIMA fitting failed: {exc}"

    # Forecast
    try:
        pred = fit.get_forecast(steps=periods)
        forecast = pred.predicted_mean
    except Exception as exc:
        return f"❌ Forecast generation failed: {exc}"
    forecast_df = forecast.to_frame(name='Forecast')

    # Plot history + forecast
    fig = go.Figure(
        data=[
            go.Scatter(x=df.index, y=df[value_col], mode='lines', name='History'),
            go.Scatter(x=forecast.index, y=forecast, mode='lines+markers', name='Forecast')
        ]
    )
    fig.update_layout(
        title=f"{value_col} Forecast",
        xaxis_title=date_col,
        yaxis_title=value_col,
        template='plotly_dark'
    )

    # Save PNG
    os.makedirs(output_dir, exist_ok=True)
    tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix='forecast_', dir=output_dir, delete=False)
    plot_path = tmp.name
    tmp.close()
    try:
        fig.write_image(plot_path, scale=2)
    except Exception as exc:
        return f"❌ Plot saving failed: {exc}"

    return forecast_df, plot_path