Spaces:
Sleeping
Sleeping
# tools/forecaster.py | |
import os | |
import tempfile | |
import pandas as pd | |
from statsmodels.tsa.arima.model import ARIMA | |
import plotly.graph_objects as go | |
from typing import Tuple, Union | |
def forecast_metric_tool( | |
file_path: str, | |
date_col: str, | |
value_col: str, | |
periods: int = 3, | |
output_dir: str = "/tmp" | |
) -> Union[Tuple[pd.DataFrame, str], str]: | |
""" | |
Load CSV or Excel, parse a time series metric, fit ARIMA(1,1,1), | |
forecast next `periods` steps, return DataFrame and PNG path. | |
Returns: | |
- (forecast_df, plot_path) on success | |
- error string starting with 'β' on failure | |
""" | |
# Load data | |
ext = os.path.splitext(file_path)[1].lower() | |
try: | |
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path) | |
except Exception as exc: | |
return f"β Failed to load file: {exc}" | |
# Validate columns | |
missing = [c for c in (date_col, value_col) if c not in df.columns] | |
if missing: | |
return f"β Missing column(s): {', '.join(missing)}" | |
# Parse and clean | |
try: | |
df[date_col] = pd.to_datetime(df[date_col], errors='coerce') | |
except Exception: | |
return f"β Could not parse '{date_col}' as dates." | |
df[value_col] = pd.to_numeric(df[value_col], errors='coerce') | |
df = df.dropna(subset=[date_col, value_col]) | |
if df.empty: | |
return f"β No valid data after cleaning '{date_col}'/'{value_col}'" | |
# Aggregate duplicates and sort | |
df = ( | |
df[[date_col, value_col]] | |
.groupby(date_col, as_index=True) | |
.mean() | |
.sort_index() | |
) | |
# Infer frequency | |
freq = pd.infer_freq(df.index) or 'D' | |
try: | |
df = df.asfreq(freq) | |
except Exception: | |
df = df[~df.index.duplicated(keep='first')].asfreq(freq) | |
# Fit ARIMA | |
try: | |
model = ARIMA(df[value_col], order=(1, 1, 1)) | |
fit = model.fit() | |
except Exception as exc: | |
return f"β ARIMA fitting failed: {exc}" | |
# Forecast | |
try: | |
pred = fit.get_forecast(steps=periods) | |
forecast = pred.predicted_mean | |
except Exception as exc: | |
return f"β Forecast generation failed: {exc}" | |
forecast_df = forecast.to_frame(name='Forecast') | |
# Plot history + forecast | |
fig = go.Figure( | |
data=[ | |
go.Scatter(x=df.index, y=df[value_col], mode='lines', name='History'), | |
go.Scatter(x=forecast.index, y=forecast, mode='lines+markers', name='Forecast') | |
] | |
) | |
fig.update_layout( | |
title=f"{value_col} Forecast", | |
xaxis_title=date_col, | |
yaxis_title=value_col, | |
template='plotly_dark' | |
) | |
# Save PNG | |
os.makedirs(output_dir, exist_ok=True) | |
tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix='forecast_', dir=output_dir, delete=False) | |
plot_path = tmp.name | |
tmp.close() | |
try: | |
fig.write_image(plot_path, scale=2) | |
except Exception as exc: | |
return f"β Plot saving failed: {exc}" | |
return forecast_df, plot_path | |