Spaces:
Sleeping
Sleeping
File size: 3,120 Bytes
eec9db3 010071f 3651f7b 010071f eec9db3 1fadf44 eec9db3 5405a02 eec9db3 5405a02 eec9db3 5405a02 eec9db3 1fadf44 9db6dea e9cc996 eec9db3 3dbe4ef eec9db3 e9cc996 eec9db3 e9cc996 eec9db3 3dbe4ef eec9db3 e9cc996 eec9db3 9db6dea eec9db3 9db6dea e9cc996 eec9db3 5405a02 eec9db3 e9cc996 eec9db3 a9cd5f0 eec9db3 5405a02 eec9db3 5405a02 3dbe4ef e9cc996 eec9db3 3dbe4ef eec9db3 e9cc996 eec9db3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import os
import tempfile
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import plotly.graph_objects as go
def forecast_metric_tool(
file_path: str,
date_col: str,
value_col: str,
periods: int = 3,
output_dir: str = "/tmp"
):
"""
Load a CSV or Excel file, parse a time series metric, fit an ARIMA(1,1,1) model,
forecast the next `periods` steps, and save a combined history+forecast plot.
Returns:
forecast_df (pd.DataFrame): next-period predicted values, indexed by date.
plot_path (str): full path to the saved PNG plot.
Errors return a string starting with 'β' describing the problem.
"""
# 0) Load data (CSV or Excel)
ext = os.path.splitext(file_path)[1].lower()
try:
if ext in ('.xls', '.xlsx'):
df = pd.read_excel(file_path)
else:
df = pd.read_csv(file_path)
except Exception as e:
return f"β Failed to load file: {e}"
# 1) Validate columns
for col in (date_col, value_col):
if col not in df.columns:
return f"β Column '{col}' not found."
# 2) Parse dates and numeric
try:
df[date_col] = pd.to_datetime(df[date_col])
except Exception:
return f"β Could not parse '{date_col}' as dates."
df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
df = df.dropna(subset=[date_col, value_col])
if df.empty:
return f"β No valid rows after dropping NaNs in '{date_col}'/'{value_col}'."
# 3) Aggregate duplicates & index
df = (
df[[date_col, value_col]]
.groupby(date_col, as_index=True)
.mean()
.sort_index()
)
# 4) Infer frequency
freq = pd.infer_freq(df.index)
if freq is None:
freq = 'D' # fallback
try:
df = df.asfreq(freq)
except ValueError as e:
# if duplicates remain
df = df[~df.index.duplicated(keep='first')].asfreq(freq)
# 5) Fit ARIMA
try:
model = ARIMA(df[value_col], order=(1, 1, 1))
fit = model.fit()
except Exception as e:
return f"β ARIMA fitting failed: {e}"
# 6) Forecast future
fc_res = fit.get_forecast(steps=periods)
forecast = fc_res.predicted_mean
forecast_df = forecast.to_frame(name='Forecast')
# 7) Plot history + forecast
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=df.index, y=df[value_col],
mode='lines+markers', name=value_col
)
)
fig.add_trace(
go.Scatter(
x=forecast.index, y=forecast,
mode='lines+markers', name='Forecast'
)
)
fig.update_layout(
title=f"{value_col} Forecast",
xaxis_title=date_col,
yaxis_title=value_col,
template='plotly_dark',
)
# 8) Save to temporary file
os.makedirs(output_dir, exist_ok=True)
tmp = tempfile.NamedTemporaryFile(
suffix='.png', prefix='forecast_', dir=output_dir, delete=False
)
plot_path = tmp.name
tmp.close()
fig.write_image(plot_path)
return forecast_df, plot_path
|