File size: 4,614 Bytes
2f38c9c
bed9614
6a4fa2c
97e6589
05cf610
6a4fa2c
1adfa85
bed9614
2f38c9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10ff16a
2f38c9c
 
 
 
6a4fa2c
 
 
2f38c9c
 
 
ddb4d52
6a4fa2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f38c9c
 
 
 
 
 
8cfda07
2f38c9c
1adfa85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00a6f27
1adfa85
 
 
00a6f27
1adfa85
 
 
 
 
a626763
00a6f27
97e6589
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import unittest
import numpy as np
from pysr import pysr, get_hof, best, best_tex, best_callable, best_row
from pysr.sr import run_feature_selection, _handle_feature_selection
import sympy
from sympy import lambdify
import pandas as pd

class TestPipeline(unittest.TestCase):
    def setUp(self):
        self.default_test_kwargs = dict(
            niterations=10,
            populations=4,
            user_input=False,
            annealing=True,
            useFrequency=False,
        )
        np.random.seed(0)
        self.X = np.random.randn(100, 5)
    
    def test_linear_relation(self):
        y = self.X[:, 0]
        equations = pysr(self.X, y, **self.default_test_kwargs)
        print(equations)
        self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4)

    def test_multioutput_custom_operator(self):
        y = self.X[:, [0, 1]]**2
        equations = pysr(self.X, y,
                         unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
                         extra_sympy_mappings={'sq': lambda x: x**2},
                         **self.default_test_kwargs,
                         procs=0)
        print(equations)
        self.assertLessEqual(equations[0].iloc[-1]['MSE'], 1e-4)
        self.assertLessEqual(equations[1].iloc[-1]['MSE'], 1e-4)

    def test_multioutput_weighted_with_callable(self):
        y = self.X[:, [0, 1]]**2
        w = np.random.rand(*y.shape)
        w[w < 0.5] = 0.0
        w[w >= 0.5] = 1.0

        # Double equation when weights are 0:
        y += (1-w) * y
        # Thus, pysr needs to use the weights to find the right equation!

        equations = pysr(self.X, y, weights=w,
                         unary_operators=["sq(x) = x^2"], binary_operators=["plus"],
                         extra_sympy_mappings={'sq': lambda x: x**2},
                         **self.default_test_kwargs,
                         procs=0)

        np.testing.assert_almost_equal(
                best_callable()[0](self.X),
                self.X[:, 0]**2)
        np.testing.assert_almost_equal(
                best_callable()[1](self.X),
                self.X[:, 1]**2)

    def test_empty_operators_single_input(self):
        X = np.random.randn(100, 1)
        y = X[:, 0] + 3.0
        equations = pysr(X, y,
                         unary_operators=[], binary_operators=["plus"],
                         **self.default_test_kwargs)

        self.assertLessEqual(equations.iloc[-1]['MSE'], 1e-4)

class TestBest(unittest.TestCase):
    def setUp(self):
        equations = pd.DataFrame({
            'Equation': ['1.0', 'cos(x0)', 'square(cos(x0))'],
            'MSE': [1.0, 0.1, 1e-5],
            'Complexity': [1, 2, 3]
            })

        equations['Complexity MSE Equation'.split(' ')].to_csv(
                'equation_file.csv.bkup', sep='|')

        self.equations = get_hof(
                'equation_file.csv', n_features=2,
                variables_names='x0 x1'.split(' '),
                extra_sympy_mappings={}, output_jax_format=False,
                multioutput=False, nout=1)

    def test_best(self):
        self.assertEqual(best(self.equations), sympy.cos(sympy.Symbol('x0'))**2)
        self.assertEqual(best(), sympy.cos(sympy.Symbol('x0'))**2)

    def test_best_tex(self):
        self.assertEqual(best_tex(self.equations), '\\cos^{2}{\\left(x_{0} \\right)}')
        self.assertEqual(best_tex(), '\\cos^{2}{\\left(x_{0} \\right)}')

    def test_best_lambda(self):
        X = np.random.randn(10, 2)
        y = np.cos(X[:, 0])**2
        for f in [best_callable(), best_callable(self.equations)]:
            np.testing.assert_almost_equal(f(X), y)


class TestFeatureSelection(unittest.TestCase):
    def test_feature_selection(self):
        np.random.seed(0)
        X = np.random.randn(20001, 5)
        y = X[:, 2]**2 + X[:, 3]**2
        selected = run_feature_selection(X, y, select_k_features=2)
        self.assertEqual(sorted(selected), [2, 3])

    def test_feature_selection_handler(self):
        np.random.seed(0)
        X = np.random.randn(20000, 5)
        y = X[:, 2]**2 + X[:, 3]**2
        var_names = [f'x{i}' for i in range(5)]
        selected_X, selected_var_names = _handle_feature_selection(
                X, select_k_features=2,
                use_custom_variable_names=True,
                variable_names=[f'x{i}' for i in range(5)],
                y=y)
        self.assertEqual(set(selected_var_names), set('x2 x3'.split(' ')))
        np.testing.assert_array_equal(
                np.sort(selected_X, axis=1),
                np.sort(X[:, [2, 3]], axis=1)
            )