from sklearn.ensemble import RandomForestRegressor import pandas as pd import numpy as np def create_lagged_features(data, n_lags=5): """ Prepares the dataset with lagged features necessary for Random Forest regression. Parameters: - data: Pandas Series of historical closing prices. - n_lags: The number of lagged observations to create as features. Returns: - A tuple (X, y) where X is a DataFrame of lagged features and y is the original dataset shifted. """ df = pd.DataFrame(data) for lag in range(1, n_lags + 1): df[f'lag_{lag}'] = df[data.name].shift(lag) df.dropna(inplace=True) # Drop rows with NaN values resulted from shifting X = df.drop(columns=[data.name]) y = df[data.name] return X, y def random_forest_forecast(data, forecast_horizon, n_lags=5): """ Forecast future values using a Random Forest Regressor, with a dynamic forecast horizon. Parameters: - data: Pandas Series of historical closing prices. - forecast_horizon: Integer specifying the number of days to forecast. - n_lags: Number of past observations to use for forecasting. Returns: - Pandas Series containing the forecasted values with a datetime index. """ X, y = create_lagged_features(data, n_lags) model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X, y) # Prepare the data for forecasting last_obs = data.tail(n_lags).values[::-1] # Reverse to get the correct order (most recent first) forecasts = [] for _ in range(forecast_horizon): # Reshape last_obs to match model input shape model_input = np.array(last_obs).reshape(1, -1) forecast = model.predict(model_input)[0] forecasts.append(forecast) # Update last_obs with the forecasted value last_obs = np.roll(last_obs, -1) last_obs[-1] = forecast future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=forecast_horizon) forecast_series = pd.Series(forecasts, index=future_dates) return forecast_series