Spaces:
Running
Running
File size: 4,796 Bytes
2f38c9c bed9614 6a4fa2c 97e6589 05cf610 6a4fa2c 1adfa85 bed9614 7d4300a 2f38c9c 7d4300a 2f38c9c 7d4300a 10ff16a 2f38c9c 7d4300a 2f38c9c 7d4300a ddb4d52 6a4fa2c 7d4300a 6a4fa2c 7d4300a 6a4fa2c 7d4300a 6a4fa2c 7d4300a 6a4fa2c 7d4300a 6a4fa2c 2f38c9c 7d4300a 8cfda07 1adfa85 7d4300a 1adfa85 7d4300a 1adfa85 7d4300a 1adfa85 7d4300a 1adfa85 7d4300a 1adfa85 7d4300a a626763 51a6b05 97e6589 51a6b05 97e6589 51a6b05 7d4300a 97e6589 7d4300a c96b30c 7d4300a c96b30c 7d4300a 97e6589 7d4300a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import unittest
import numpy as np
from pysr import pysr, get_hof, best, best_tex, best_callable, best_row
from pysr.sr import run_feature_selection, _handle_feature_selection
import sympy
from sympy import lambdify
import pandas as pd
class TestPipeline(unittest.TestCase):
def setUp(self):
self.default_test_kwargs = dict(
niterations=10,
populations=4,
user_input=False,
annealing=True,
useFrequency=False,
)
np.random.seed(0)
self.X = np.random.randn(100, 5)
def test_linear_relation(self):
y = self.X[:, 0]
equations = pysr(self.X, y, **self.default_test_kwargs)
print(equations)
self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
def test_multioutput_custom_operator(self):
y = self.X[:, [0, 1]] ** 2
equations = pysr(
self.X,
y,
unary_operators=["sq(x) = x^2"],
binary_operators=["plus"],
extra_sympy_mappings={"sq": lambda x: x ** 2},
**self.default_test_kwargs,
procs=0,
)
print(equations)
self.assertLessEqual(equations[0].iloc[-1]["MSE"], 1e-4)
self.assertLessEqual(equations[1].iloc[-1]["MSE"], 1e-4)
def test_multioutput_weighted_with_callable(self):
y = self.X[:, [0, 1]] ** 2
w = np.random.rand(*y.shape)
w[w < 0.5] = 0.0
w[w >= 0.5] = 1.0
# Double equation when weights are 0:
y += (1 - w) * y
# Thus, pysr needs to use the weights to find the right equation!
equations = pysr(
self.X,
y,
weights=w,
unary_operators=["sq(x) = x^2"],
binary_operators=["plus"],
extra_sympy_mappings={"sq": lambda x: x ** 2},
**self.default_test_kwargs,
procs=0,
)
np.testing.assert_almost_equal(
best_callable()[0](self.X), self.X[:, 0] ** 2, decimal=4
)
np.testing.assert_almost_equal(
best_callable()[1](self.X), self.X[:, 1] ** 2, decimal=4
)
def test_empty_operators_single_input(self):
X = np.random.randn(100, 1)
y = X[:, 0] + 3.0
equations = pysr(
X,
y,
unary_operators=[],
binary_operators=["plus"],
**self.default_test_kwargs,
)
self.assertLessEqual(equations.iloc[-1]["MSE"], 1e-4)
class TestBest(unittest.TestCase):
def setUp(self):
equations = pd.DataFrame(
{
"Equation": ["1.0", "cos(x0)", "square(cos(x0))"],
"MSE": [1.0, 0.1, 1e-5],
"Complexity": [1, 2, 3],
}
)
equations["Complexity MSE Equation".split(" ")].to_csv(
"equation_file.csv.bkup", sep="|"
)
self.equations = get_hof(
"equation_file.csv",
n_features=2,
variables_names="x0 x1".split(" "),
extra_sympy_mappings={},
output_jax_format=False,
multioutput=False,
nout=1,
)
def test_best(self):
self.assertEqual(best(self.equations), sympy.cos(sympy.Symbol("x0")) ** 2)
self.assertEqual(best(), sympy.cos(sympy.Symbol("x0")) ** 2)
def test_best_tex(self):
self.assertEqual(best_tex(self.equations), "\\cos^{2}{\\left(x_{0} \\right)}")
self.assertEqual(best_tex(), "\\cos^{2}{\\left(x_{0} \\right)}")
def test_best_lambda(self):
X = np.random.randn(10, 2)
y = np.cos(X[:, 0]) ** 2
for f in [best_callable(), best_callable(self.equations)]:
np.testing.assert_almost_equal(f(X), y, decimal=4)
class TestFeatureSelection(unittest.TestCase):
def setUp(self):
np.random.seed(0)
def test_feature_selection(self):
X = np.random.randn(20000, 5)
y = X[:, 2] ** 2 + X[:, 3] ** 2
selected = run_feature_selection(X, y, select_k_features=2)
self.assertEqual(sorted(selected), [2, 3])
def test_feature_selection_handler(self):
X = np.random.randn(20000, 5)
y = X[:, 2] ** 2 + X[:, 3] ** 2
var_names = [f"x{i}" for i in range(5)]
selected_X, selected_var_names, selection = _handle_feature_selection(
X,
select_k_features=2,
use_custom_variable_names=True,
variable_names=[f"x{i}" for i in range(5)],
y=y,
)
self.assertTrue((2 in selection) and (3 in selection))
self.assertEqual(set(selected_var_names), set("x2 x3".split(" ")))
np.testing.assert_array_equal(
np.sort(selected_X, axis=1), np.sort(X[:, [2, 3]], axis=1)
)
|