PySR / pysr /denoising.py
MilesCranmer's picture
fix: variety of typing information
fd4c500 unverified
raw
history blame
1.37 kB
"""Functions for denoising data during preprocessing."""
from typing import Optional, Tuple, cast
import numpy as np
from numpy import ndarray
def denoise(
X: ndarray,
y: ndarray,
Xresampled: Optional[ndarray] = None,
random_state: Optional[np.random.RandomState] = None,
) -> Tuple[ndarray, ndarray]:
"""Denoise the dataset using a Gaussian process."""
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
gp_kernel = RBF(np.ones(X.shape[1])) + WhiteKernel(1e-1) + ConstantKernel()
gpr = GaussianProcessRegressor(
kernel=gp_kernel, n_restarts_optimizer=50, random_state=random_state
)
gpr.fit(X, y)
if Xresampled is not None:
return Xresampled, cast(ndarray, gpr.predict(Xresampled))
return X, cast(ndarray, gpr.predict(X))
def multi_denoise(
X: ndarray,
y: ndarray,
Xresampled: Optional[ndarray] = None,
random_state: Optional[np.random.RandomState] = None,
):
"""Perform `denoise` along each column of `y` independently."""
y = np.stack(
[
denoise(X, y[:, i], Xresampled=Xresampled, random_state=random_state)[1]
for i in range(y.shape[1])
],
axis=1,
)
if Xresampled is not None:
return Xresampled, y
return X, y