mgbam commited on
Commit
5405a02
·
verified ·
1 Parent(s): 092c2a9

Update tools/forecaster.py

Browse files
Files changed (1) hide show
  1. tools/forecaster.py +31 -16
tools/forecaster.py CHANGED
@@ -6,55 +6,70 @@ import plotly.graph_objects as go
6
 
7
  def forecast_metric_tool(file_path: str, date_col: str, value_col: str):
8
  """
9
- Forecast next 3 periods for any numeric metric, saving
10
- the PNG under /tmp and returning the forecast table as text.
 
11
  """
12
- # 1) Load & parse dates
 
13
  df = pd.read_csv(file_path)
 
 
 
 
 
 
 
 
14
  try:
15
  df[date_col] = pd.to_datetime(df[date_col])
16
  except Exception:
17
  return f"❌ Could not parse '{date_col}' as dates."
18
 
19
- # 2) Coerce metric to numeric & drop invalid rows
20
  df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
21
  df = df.dropna(subset=[date_col, value_col])
22
  if df.empty:
23
- return f"❌ No valid data for '{value_col}'."
24
 
25
- # 3) Sort by date, set index, then collapse any duplicate timestamps
26
  df = df.sort_values(date_col).set_index(date_col)
27
- # If you have multiple rows for the same timestamp, take their mean
28
  df = df[[value_col]].groupby(level=0).mean()
29
 
30
- # 4) Infer frequency (e.g. 'D', 'M', etc.) and reindex
31
  freq = pd.infer_freq(df.index)
32
  if freq is None:
33
  freq = "D" # fallback to daily
34
  df = df.asfreq(freq)
35
 
36
- # 5) Fit ARIMA
37
  try:
38
  model = ARIMA(df[value_col], order=(1, 1, 1))
39
  model_fit = model.fit()
40
  except Exception as e:
41
  return f"❌ ARIMA fitting failed: {e}"
42
 
43
- # 6) Forecast with a proper DatetimeIndex
44
  fc_res = model_fit.get_forecast(steps=3)
45
- forecast = fc_res.predicted_mean # pd.Series indexed by future dates
46
 
47
- # 7) Plot history + forecast
48
  fig = go.Figure()
49
- fig.add_scatter(x=df.index, y=df[value_col], mode="lines", name=value_col)
50
- fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
 
 
 
 
 
 
51
  fig.update_layout(
52
  title=f"{value_col} Forecast",
53
  xaxis_title=date_col,
54
  yaxis_title=value_col,
55
  template="plotly_dark",
56
  )
57
- fig.write_image("forecast_plot.png") # safely lands in /tmp via monkey-patch
58
 
59
- # 8) Return the forecast table as plain text
60
  return forecast.to_frame(name="Forecast").to_string()
 
6
 
7
  def forecast_metric_tool(file_path: str, date_col: str, value_col: str):
8
  """
9
+ Forecast the next 3 periods for any numeric metric.
10
+ - Saves a date‐indexed Plotly PNG under /tmp via the safe write monkey‐patch.
11
+ - Returns a text table of the forecast.
12
  """
13
+
14
+ # 0) Read full CSV
15
  df = pd.read_csv(file_path)
16
+
17
+ # 1) Check that both columns actually exist
18
+ if date_col not in df.columns:
19
+ return f"❌ Date column '{date_col}' not found in your data."
20
+ if value_col not in df.columns:
21
+ return f"❌ Metric column '{value_col}' not found in your data."
22
+
23
+ # 2) Parse dates
24
  try:
25
  df[date_col] = pd.to_datetime(df[date_col])
26
  except Exception:
27
  return f"❌ Could not parse '{date_col}' as dates."
28
 
29
+ # 3) Coerce metric to numeric & drop invalid rows
30
  df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
31
  df = df.dropna(subset=[date_col, value_col])
32
  if df.empty:
33
+ return f"❌ After coercion, no valid data remains for '{value_col}'."
34
 
35
+ # 4) Sort & index by date, collapse duplicates
36
  df = df.sort_values(date_col).set_index(date_col)
 
37
  df = df[[value_col]].groupby(level=0).mean()
38
 
39
+ # 5) Infer a frequency and re‐index
40
  freq = pd.infer_freq(df.index)
41
  if freq is None:
42
  freq = "D" # fallback to daily
43
  df = df.asfreq(freq)
44
 
45
+ # 6) Fit ARIMA (1,1,1)
46
  try:
47
  model = ARIMA(df[value_col], order=(1, 1, 1))
48
  model_fit = model.fit()
49
  except Exception as e:
50
  return f"❌ ARIMA fitting failed: {e}"
51
 
52
+ # 7) Produce a proper date‐indexed forecast
53
  fc_res = model_fit.get_forecast(steps=3)
54
+ forecast = fc_res.predicted_mean
55
 
56
+ # 8) Plot history + forecast
57
  fig = go.Figure()
58
+ fig.add_scatter(
59
+ x=df.index, y=df[value_col],
60
+ mode="lines", name=value_col
61
+ )
62
+ fig.add_scatter(
63
+ x=forecast.index, y=forecast,
64
+ mode="lines+markers", name="Forecast"
65
+ )
66
  fig.update_layout(
67
  title=f"{value_col} Forecast",
68
  xaxis_title=date_col,
69
  yaxis_title=value_col,
70
  template="plotly_dark",
71
  )
72
+ fig.write_image("forecast_plot.png") # lands in /tmp via our monkeypatch
73
 
74
+ # 9) Return the forecast as a text table
75
  return forecast.to_frame(name="Forecast").to_string()