Spaces:
Sleeping
Sleeping
File size: 3,014 Bytes
92cca14 eec9db3 010071f 3651f7b 92cca14 010071f eec9db3 92cca14 1fadf44 92cca14 5405a02 eec9db3 92cca14 eec9db3 92cca14 eec9db3 92cca14 5405a02 92cca14 eec9db3 92cca14 1fadf44 92cca14 1fadf44 9db6dea eec9db3 3dbe4ef 92cca14 e9cc996 92cca14 eec9db3 e9cc996 92cca14 eec9db3 92cca14 eec9db3 e9cc996 92cca14 9db6dea eec9db3 92cca14 e9cc996 92cca14 eec9db3 e9cc996 92cca14 5405a02 3dbe4ef e9cc996 92cca14 eec9db3 92cca14 eec9db3 92cca14 eec9db3 92cca14 e9cc996 eec9db3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# tools/forecaster.py
import os
import tempfile
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go
from typing import Tuple, Union
def forecast_metric_tool(
file_path: str,
date_col: str,
value_col: str,
periods: int = 3,
output_dir: str = "/tmp"
) -> Union[Tuple[pd.DataFrame, str], str]:
"""
Load CSV or Excel, parse a time series metric, fit ARIMA(1,1,1),
forecast next `periods` steps, return DataFrame and PNG path.
Returns:
- (forecast_df, plot_path) on success
- error string starting with 'β' on failure
"""
# Load data
ext = os.path.splitext(file_path)[1].lower()
try:
df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
except Exception as exc:
return f"β Failed to load file: {exc}"
# Validate columns
missing = [c for c in (date_col, value_col) if c not in df.columns]
if missing:
return f"β Missing column(s): {', '.join(missing)}"
# Parse and clean
try:
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
except Exception:
return f"β Could not parse '{date_col}' as dates."
df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
df = df.dropna(subset=[date_col, value_col])
if df.empty:
return f"β No valid data after cleaning '{date_col}'/'{value_col}'"
# Aggregate duplicates and sort
df = (
df[[date_col, value_col]]
.groupby(date_col, as_index=True)
.mean()
.sort_index()
)
# Infer frequency
freq = pd.infer_freq(df.index) or 'D'
try:
df = df.asfreq(freq)
except Exception:
df = df[~df.index.duplicated(keep='first')].asfreq(freq)
# Fit ARIMA
try:
model = ARIMA(df[value_col], order=(1, 1, 1))
fit = model.fit()
except Exception as exc:
return f"β ARIMA fitting failed: {exc}"
# Forecast
try:
pred = fit.get_forecast(steps=periods)
forecast = pred.predicted_mean
except Exception as exc:
return f"β Forecast generation failed: {exc}"
forecast_df = forecast.to_frame(name='Forecast')
# Plot history + forecast
fig = go.Figure(
data=[
go.Scatter(x=df.index, y=df[value_col], mode='lines', name='History'),
go.Scatter(x=forecast.index, y=forecast, mode='lines+markers', name='Forecast')
]
)
fig.update_layout(
title=f"{value_col} Forecast",
xaxis_title=date_col,
yaxis_title=value_col,
template='plotly_dark'
)
# Save PNG
os.makedirs(output_dir, exist_ok=True)
tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix='forecast_', dir=output_dir, delete=False)
plot_path = tmp.name
tmp.close()
try:
fig.write_image(plot_path, scale=2)
except Exception as exc:
return f"β Plot saving failed: {exc}"
return forecast_df, plot_path
|