netflypsb commited on
Commit
4ba82ce
·
verified ·
1 Parent(s): f852243

Create random_forest.py

Browse files
Files changed (1) hide show
  1. algo/random_forest.py +57 -0
algo/random_forest.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.ensemble import RandomForestRegressor
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ def create_lagged_features(data, n_lags=5):
6
+ """
7
+ Prepares the dataset with lagged features necessary for Random Forest regression.
8
+
9
+ Parameters:
10
+ - data: Pandas Series of historical closing prices.
11
+ - n_lags: The number of lagged observations to create as features.
12
+
13
+ Returns:
14
+ - A tuple (X, y) where X is a DataFrame of lagged features and y is the original dataset shifted.
15
+ """
16
+ df = pd.DataFrame(data)
17
+ for lag in range(1, n_lags + 1):
18
+ df[f'lag_{lag}'] = df[data.name].shift(lag)
19
+ df.dropna(inplace=True) # Drop rows with NaN values resulted from shifting
20
+ X = df.drop(columns=[data.name])
21
+ y = df[data.name]
22
+ return X, y
23
+
24
+ def random_forest_forecast(data, forecast_horizon, n_lags=5):
25
+ """
26
+ Forecast future values using a Random Forest Regressor, with a dynamic forecast horizon.
27
+
28
+ Parameters:
29
+ - data: Pandas Series of historical closing prices.
30
+ - forecast_horizon: Integer specifying the number of days to forecast.
31
+ - n_lags: Number of past observations to use for forecasting.
32
+
33
+ Returns:
34
+ - Pandas Series containing the forecasted values with a datetime index.
35
+ """
36
+ X, y = create_lagged_features(data, n_lags)
37
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
38
+ model.fit(X, y)
39
+
40
+ # Prepare the data for forecasting
41
+ last_obs = data.tail(n_lags).values[::-1] # Reverse to get the correct order (most recent first)
42
+ forecasts = []
43
+
44
+ for _ in range(forecast_horizon):
45
+ # Reshape last_obs to match model input shape
46
+ model_input = np.array(last_obs).reshape(1, -1)
47
+ forecast = model.predict(model_input)[0]
48
+ forecasts.append(forecast)
49
+
50
+ # Update last_obs with the forecasted value
51
+ last_obs = np.roll(last_obs, -1)
52
+ last_obs[-1] = forecast
53
+
54
+ future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=forecast_horizon)
55
+ forecast_series = pd.Series(forecasts, index=future_dates)
56
+
57
+ return forecast_series