Spaces:
Runtime error
Runtime error
from sklearn.ensemble import RandomForestRegressor | |
import pandas as pd | |
import numpy as np | |
def create_lagged_features(data, n_lags=5): | |
""" | |
Prepares the dataset with lagged features necessary for Random Forest regression. | |
Parameters: | |
- data: Pandas Series of historical closing prices. | |
- n_lags: The number of lagged observations to create as features. | |
Returns: | |
- A tuple (X, y) where X is a DataFrame of lagged features and y is the original dataset shifted. | |
""" | |
df = pd.DataFrame(data) | |
for lag in range(1, n_lags + 1): | |
df[f'lag_{lag}'] = df[data.name].shift(lag) | |
df.dropna(inplace=True) # Drop rows with NaN values resulted from shifting | |
X = df.drop(columns=[data.name]) | |
y = df[data.name] | |
return X, y | |
def random_forest_forecast(data, forecast_horizon, n_lags=5): | |
""" | |
Forecast future values using a Random Forest Regressor, with a dynamic forecast horizon. | |
Parameters: | |
- data: Pandas Series of historical closing prices. | |
- forecast_horizon: Integer specifying the number of days to forecast. | |
- n_lags: Number of past observations to use for forecasting. | |
Returns: | |
- Pandas Series containing the forecasted values with a datetime index. | |
""" | |
X, y = create_lagged_features(data, n_lags) | |
model = RandomForestRegressor(n_estimators=100, random_state=42) | |
model.fit(X, y) | |
# Prepare the data for forecasting | |
last_obs = data.tail(n_lags).values[::-1] # Reverse to get the correct order (most recent first) | |
forecasts = [] | |
for _ in range(forecast_horizon): | |
# Reshape last_obs to match model input shape | |
model_input = np.array(last_obs).reshape(1, -1) | |
forecast = model.predict(model_input)[0] | |
forecasts.append(forecast) | |
# Update last_obs with the forecasted value | |
last_obs = np.roll(last_obs, -1) | |
last_obs[-1] = forecast | |
future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=forecast_horizon) | |
forecast_series = pd.Series(forecasts, index=future_dates) | |
return forecast_series | |