File size: 3,239 Bytes
ac2928c
7e72d81
ac2928c
 
 
 
 
 
 
 
c88fbe0
ac2928c
 
c88fbe0
ac2928c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c88fbe0
 
ac2928c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c88fbe0
ac2928c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import csv


class Problem:
    """
    Problem API to work with PySR.

    Should be able to call pysr(problem.X, problem.y, var_names=problem.var_names) and have it work
    """
    def __init__(self, X, y, variable_names=None):
        self.X = X
        self.y = y
        self.variable_names = variable_names


class FeynmanProblem(Problem):
    """
    Stores the data for the problems from the 100 Feynman Equations on Physics.
    This is the benchmark used in the AI Feynman Paper
    """
    def __init__(self, row, gen=False, dp=500):
        """
        row: a row read as a dict from the FeynmanEquations dataset provided in the datasets folder of the repo
        gen: If true the problem will have dp X and y values randomly generated else they will be None
        """
        self.eq_id      = row['Filename']
        self.form       = row['Formula']
        self.n_vars     = int(row['# variables'])
        super(FeynmanProblem, self).__init__(None, None,
                                             variable_names=[row[f'v{i + 1}_name'] for i in range(self.n_vars)])
        #self.var_names  = [row[f'v{i+1}_name']  for i in range(self.n_vars)]
        self.low        = [float(row[f'v{i+1}_low'])   for i in range(self.n_vars)]
        self.high       = [float(row[f'v{i+1}_high'])  for i in range(self.n_vars)]
        self.dp         = dp#int(row[f'datapoints'])
        #self.X = None
        #self.Y = None
        if gen:
            self.X = np.random.uniform(0.01, 25, size=(self.dp, self.n_vars))
            d = {}
            for var in range(len(self.var_names)):
                d[self.var_names[var]] = self.X[:, var]
            d['exp'] = np.exp
            d['sqrt'] = np.sqrt
            d['pi'] = np.pi
            d['cos'] = np.cos
            d['sin'] = np.sin
            d['tan'] = np.tan
            d['tanh'] = np.tanh
            d['ln']   = np.log
            d['log'] = np.log # Quite sure the Feynman dataset has no base 10 logs
            d['arcsin'] = np.arcsin
            self.y = eval(self.form,d)
        return

    def __str__(self):
        return f"Feynman Equation: {self.eq_id}|Form: {self.form}"

    def __repr__(self):
        return str(self)

    def mk_problems(first=100, gen=False, dp=500, data_dir="datasets/FeynmanEquations.csv"):
        """

        first: the first "first" equations from the dataset will be made into problems
        data_dir: the path pointing to the Feynman Equations csv
        returns: list of FeynmanProblems
        """
        ret = []
        with open(data_dir) as csvfile:
            ind = 0
            reader = csv.DictReader(csvfile)
            for i, row in enumerate(reader):
                if ind > first:
                    break
                if row['Filename'] == '': continue
                try:
                    p = FeynmanProblem(row, gen=gen, dp=dp)
                    ret.append(p)
                except Exception as e:
                    #traceback.print_exc()
                    #print(row)
                    print(f"FAILED ON ROW {i}")
                ind += 1
        return ret


if __name__ == "__main__":
    ret = FeynmanProblem.mk_problems(first=100, gen=True)
    print(ret)