File size: 2,110 Bytes
4ba82ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
import numpy as np

def create_lagged_features(data, n_lags=5):
    """
    Prepares the dataset with lagged features necessary for Random Forest regression.

    Parameters:
    - data: Pandas Series of historical closing prices.
    - n_lags: The number of lagged observations to create as features.
    
    Returns:
    - A tuple (X, y) where X is a DataFrame of lagged features and y is the original dataset shifted.
    """
    df = pd.DataFrame(data)
    for lag in range(1, n_lags + 1):
        df[f'lag_{lag}'] = df[data.name].shift(lag)
    df.dropna(inplace=True)  # Drop rows with NaN values resulted from shifting
    X = df.drop(columns=[data.name])
    y = df[data.name]
    return X, y

def random_forest_forecast(data, forecast_horizon, n_lags=5):
    """
    Forecast future values using a Random Forest Regressor, with a dynamic forecast horizon.

    Parameters:
    - data: Pandas Series of historical closing prices.
    - forecast_horizon: Integer specifying the number of days to forecast.
    - n_lags: Number of past observations to use for forecasting.
    
    Returns:
    - Pandas Series containing the forecasted values with a datetime index.
    """
    X, y = create_lagged_features(data, n_lags)
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)

    # Prepare the data for forecasting
    last_obs = data.tail(n_lags).values[::-1]  # Reverse to get the correct order (most recent first)
    forecasts = []

    for _ in range(forecast_horizon):
        # Reshape last_obs to match model input shape
        model_input = np.array(last_obs).reshape(1, -1)
        forecast = model.predict(model_input)[0]
        forecasts.append(forecast)

        # Update last_obs with the forecasted value
        last_obs = np.roll(last_obs, -1)
        last_obs[-1] = forecast

    future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=forecast_horizon)
    forecast_series = pd.Series(forecasts, index=future_dates)
    
    return forecast_series