PySR / pysr /feature_selection.py
pre-commit-ci[bot]
[pre-commit.ci] pre-commit autoupdate (#537)
75c23d4 unverified
raw
history blame
1.17 kB
"""Functions for doing feature selection during preprocessing."""
import numpy as np
def run_feature_selection(X, y, select_k_features, random_state=None):
"""
Find most important features.
Uses a gradient boosting tree regressor as a proxy for finding
the k most important features in X, returning indices for those
features as output.
"""
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel
clf = RandomForestRegressor(
n_estimators=100, max_depth=3, random_state=random_state
)
clf.fit(X, y)
selector = SelectFromModel(
clf, threshold=-np.inf, max_features=select_k_features, prefit=True
)
return selector.get_support(indices=True)
# Function has not been removed only due to usage in module tests
def _handle_feature_selection(X, select_k_features, y, variable_names):
if select_k_features is not None:
selection = run_feature_selection(X, y, select_k_features)
print(f"Using features {[variable_names[i] for i in selection]}")
X = X[:, selection]
else:
selection = None
return X, selection