AutonLabTruth commited on
Commit
e30b74d
·
1 Parent(s): a924098

Removed LGGA from the recover branch entirely

Browse files
lgga/FeynmanProblem.py DELETED
@@ -1,62 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import tempfile, os, pdb, csv, traceback,random, time
4
-
5
-
6
- class FeynmanProblem:
7
- def __init__(self, row, gen=False):
8
- self.eq_id = row['Filename']
9
- self.form = row['Formula']
10
- self.n_vars = int(row['# variables'])
11
- self.var_names = [row[f'v{i+1}_name'] for i in range(self.n_vars)]
12
- self.low = [float(row[f'v{i+1}_low']) for i in range(self.n_vars)]
13
- self.high = [float(row[f'v{i+1}_high']) for i in range(self.n_vars)]
14
- self.dp = 500#int(row[f'datapoints'])
15
- self.X = None
16
- self.Y = None
17
- if gen:
18
- self.X = np.random.uniform(0.01, 25, size=(self.dp, self.n_vars))
19
- d = {}
20
- for var in range(len(self.var_names)):
21
- d[self.var_names[var]] = self.X[:, var]
22
- d['exp'] = np.exp
23
- d['sqrt'] = np.sqrt
24
- d['pi'] = np.pi
25
- d['cos'] = np.cos
26
- d['sin'] = np.sin
27
- d['tan'] = np.tan
28
- d['tanh'] = np.tanh
29
- d['ln'] = np.log
30
- d['arcsin'] = np.arcsin
31
- self.Y = eval(self.form,d)
32
- return
33
-
34
- def __str__(self):
35
- return f"Feynman Equation: {self.eq_id}|Form: {self.form}"
36
-
37
- def __repr__(self):
38
- return str(self)
39
-
40
- def mk_problems(first=100, gen=False, data_dir="datasets/FeynmanEquations.csv"):
41
- ret = []
42
- with open(data_dir) as csvfile:
43
- ind = 0
44
- reader = csv.DictReader(csvfile)
45
- for i, row in enumerate(reader):
46
- if ind > first:
47
- break
48
- if row['Filename'] == '': continue
49
- try:
50
- p = FeynmanProblem(row, gen=gen)
51
- ret.append(p)
52
- except Exception as e:
53
- #traceback.print_exc()
54
- #print(row)
55
- print(f"FAILED ON ROW {i}")
56
- ind += 1
57
- return ret
58
-
59
-
60
- if __name__ == "__main__":
61
- ret = FeynmanProblem.mk_problems(first=100, gen=True)
62
- print(ret)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lgga/Oracle.py DELETED
@@ -1,97 +0,0 @@
1
- import numpy as np
2
-
3
-
4
- class Oracle:
5
- oracle_d = {'exp': np.exp, 'sqrt': np.sqrt, 'pi': np.pi, 'cos': np.cos, 'sin': np.sin, 'tan': np.tan,
6
- 'tanh': np.tanh, 'ln': np.log, 'arcsin': np.arcsin}
7
-
8
- def __init__(self, nvariables, f=None, form=None, variable_names=None, range_restriction={}, id=None):
9
- """
10
- nvariables: is the number of variables the function takes in
11
- f: takes in an X of shape (n, nvariables) and returns f(X) of shape (n,)
12
- form: String Def of the function
13
- variable_names: variable names used in form
14
- Range_restrictions: Dictionary of form {variable_index: (low, high)}
15
- """
16
- self.nvariables = nvariables
17
- if f is None and form is None:
18
- raise ValueError("f and form are both none in Oracle initialization. Specify at least one")
19
- if f is not None and form is not None:
20
- raise ValueError("f and form are both not none, pick only one")
21
- if form is not None and variable_names is None:
22
- raise ValueError("If form is provided then variable_names must also be provided")
23
- if form is not None:
24
- self.form = form
25
- self.variable_names = variable_names
26
- self.use_func = False
27
- self.d = Oracle.oracle_d.copy()
28
- for var_name in variable_names:
29
- self.d[var_name] = None
30
- else:
31
- # f is not None
32
- self.func = f
33
- self.use_func = True
34
-
35
- self.ranges = []
36
- for i in range(nvariables):
37
- if i in range_restriction:
38
- self.ranges.append(range_restriction[i])
39
- else:
40
- self.ranges.append(None)
41
-
42
- if id is not None:
43
- self.id = id
44
- return
45
-
46
- def f(self, X):
47
- """
48
- X is of shape (n, nvariables)
49
- """
50
- if self.invalid_input(X):
51
- raise ValueError("Invalid input to Oracle")
52
- if self.use_func:
53
- return self.func(X)
54
- else:
55
- return self.form_f(X)
56
-
57
- def form_f(self, X):
58
- """
59
- Returns the function output using form
60
- """
61
- for i, var in enumerate(self.variable_names):
62
- self.d[var] = X[:, i]
63
- return eval(self.form, self.d)
64
-
65
- def invalid_input(self, X):
66
- """
67
- Returns true if any of the following are true
68
- X has more or less variables than nvariables
69
- X has a value in a restricted range variable outside said range
70
- """
71
- if X.shape[1] != self.nvariables:
72
- return True
73
- for i, r in enumerate(self.ranges):
74
- if r is None:
75
- continue
76
- else:
77
- low = r[0]
78
- high = r[1]
79
- low_check = all(low <= X[:, i])
80
- high_check = all(X[:, i] <= high)
81
- if not low_check or not high_check:
82
- return True
83
-
84
- def __str__(self):
85
- if self.id:
86
- return str(self.id)
87
- elif self.form:
88
- return str(self.form)
89
- else:
90
- return "<Un named Oracle>"
91
-
92
- def from_problem(problem):
93
- """
94
- Static function to return an oracle when given an instance of class problem.
95
- """
96
- return Oracle(nvariables=problem.n_vars, f=None, form=problem.form, variable_names=problem.var_names,
97
- id=problem.eq_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lgga/Transformation.py DELETED
@@ -1,77 +0,0 @@
1
- class Transformation:
2
- def __init__(self, index, name="Identity Transformation"):
3
- self.name = name
4
- self.index = index
5
-
6
- def transform(self, X):
7
- """
8
- Takes in a data point of shape (n, d) and returns an augmented data point based on the constraint
9
- """
10
- return X
11
-
12
- def __str__(self):
13
- return str(self.name)
14
-
15
- def __repr__(self):
16
- return str(self)
17
-
18
- def get_params(self):
19
- raise NotImplementedError
20
-
21
-
22
- class SymTransformation(Transformation):
23
- def __init__(self, x1=0, x2=1):
24
- """
25
- x1, x2 = indices of the variables which are symmetric
26
- """
27
- super().__init__(1, name=f"Symmetry Between Variable {x1} and {x2}")
28
- self.x1 = x1
29
- self.x2 = x2
30
-
31
- def transform(self, X):
32
- """
33
- """
34
- temp = X.copy()
35
- temp[:, self.x2] = X[:, self.x1].copy()
36
- temp[:, self.x1] = X[:, self.x2].copy()
37
- return temp
38
-
39
- def get_params(self):
40
- return [self.x1, self.x2]
41
-
42
-
43
- class ZeroTransformation(Transformation):
44
- def __init__(self, inds=[0]):
45
- """
46
- inds is a list of indices to set to 0
47
- """
48
- super().__init__(2, name=f"Zero Constraint for Variables {inds}")
49
- self.inds = inds
50
-
51
- def transform(self, X):
52
- temp = X.copy()
53
- for ind in self.inds:
54
- temp[:, ind] = 0
55
- return temp
56
-
57
- def get_params(self):
58
- return list(self.inds)
59
-
60
-
61
- class ValueTransformation(Transformation):
62
- def __init__(self, inds=[0]):
63
- """
64
- inds is list of indices to set to the same value as the first element in that list
65
- """
66
- super().__init__(3, name=f"Value Constraint for Variables {inds}")
67
- self.inds = inds
68
-
69
- def transform(self, X):
70
- temp = X.copy()
71
- val = temp[:, self.inds[0]]
72
- for ind in self.inds[1:]:
73
- temp[:, ind] = val
74
- return temp
75
-
76
- def get_params(self):
77
- return list(self.inds)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lgga/Truth.py DELETED
@@ -1,39 +0,0 @@
1
- import numpy as np
2
-
3
-
4
- class Truth:
5
- def __init__(self, transformation, model):
6
- self.transformation = transformation
7
- self.weights = list(model.coef_) + [model.intercept_]
8
-
9
- def predict(self, X, y):
10
- transformed = self.transformation.transform(X)
11
- res = np.zeros(shape=y.shape)
12
- for w in range(len(self.weights)):
13
- if w < X.shape[1]:
14
- res = res + (X[:, w] * self.weights[w])
15
- elif w == X.shape[1]:
16
- res = res + (y * self.weights[w])
17
- else:
18
- assert w == X.shape[1] + 1
19
- res = res + self.weights[w]
20
- return res
21
-
22
- def transform(self, X):
23
- return self.transformation.transform(X)
24
-
25
- def __str__(self):
26
- return f"Auxiliary Truth: {self.transformation} with linear coefficients for X, y, 1 {self.weights}"
27
-
28
- def __repr__(self):
29
- return str(self)
30
-
31
- def julia_string(self):
32
- """
33
- Return an expression that sorta creates a julia instances of Truth with these parameters
34
- Specifically Truth(type, params, weights)
35
- Julia indexing starts at 1 not 0 so we need to add 1 to all parameter indices
36
- """
37
- index = self.transformation.index
38
- params = self.transformation.get_params()
39
- return f"Truth({index}, {[param + 1 for param in params]}, {self.weights})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lgga/__init__.py DELETED
File without changes
lgga/constraint_discovery.py DELETED
@@ -1,193 +0,0 @@
1
- from sklearn.linear_model import LinearRegression
2
- from Transformation import *
3
- from Truth import *
4
- import itertools
5
- import warnings
6
- import traceback
7
-
8
-
9
- def gen_valid_points(oracle, npoints=20, default_min=0.5, default_max=30):
10
- """
11
- Generates valid dataset (npoints, dim)
12
- """
13
- dim = oracle.nvariables
14
- # print(f"Dim {dim}, {oracle.nvariables}")
15
- # print(f"Oracle has {oracle} {oracle.variable_names}")
16
- mins = []
17
- maxes = []
18
- for r in oracle.ranges:
19
- if r is None:
20
- mins.append(default_min)
21
- maxes.append(default_max)
22
- else:
23
- mins.append(r[0])
24
- maxes.append(r[1])
25
- return np.random.uniform(low=mins, high=maxes, size=(npoints, dim))
26
-
27
-
28
- def discover(transformation, oracle, npoints=20, threshold=0.98, timeout=5):
29
- """
30
- Constraint is a class child of the Class parent Constraint
31
-
32
- Oracle is a class which has a variable nvariables i.e number of inputs and a function f which performs f(X)
33
- f(X) must be of shape (n, nvariables)
34
-
35
- npoints: number of data points to train the weak model with
36
-
37
- threshold: minimum accuracy of weak model to say that a constraint has been found
38
-
39
- timeout: If the random generator cannot find a valid input in timeout seconds we quit
40
- """
41
- # Get random 10 points from some range
42
- start = time()
43
- sat = False
44
- while not sat and time() - start < timeout:
45
- try:
46
- points = gen_valid_points(oracle, npoints)
47
- y_original = oracle.f(points)
48
- if any(np.isnan(y_original)) or any(np.isinf(y_original)):
49
- print(points, points.shape, oracle)
50
- print(y_original)
51
- break
52
- raise ValueError()
53
- sat = True
54
- except:
55
- traceback.print_stack()
56
- if not sat:
57
- warnings.warn(f"Could not find an input that worked for oracle - ({oracle})")
58
- return False, None
59
- # print(points)
60
- X = transformation.transform(points)
61
- try:
62
- y = oracle.f(X)
63
- if any(np.isnan(y)) or any(np.isinf(y)):
64
- raise ValueError()
65
- except:
66
- # If the oracle cannot evaluate this input because of an out of domain error
67
- return False, None
68
- model, score = weak_learner(X, y, y_original)
69
- if score > threshold:
70
- return True, Truth(transformation, model)
71
- else:
72
- return False, Truth(transformation, model)
73
-
74
-
75
- def weak_learner(X, y, y_original):
76
- """
77
- Takes in X, y and returns a weak learner that tries to fit the training data and its associated R^2 score as well as the model itself
78
- """
79
-
80
- y_original = np.reshape(y_original, newshape=(len(y_original), 1))
81
- # print(X.shape, y_original.shape)
82
- new_X = np.append(X, y_original, axis=1)
83
-
84
- model = LinearRegression()
85
- model.fit(new_X, y)
86
- # Force the model to be simple by rounding coefficients to 2 decimal points
87
- model.coef_ = np.round(model.coef_, 2)
88
- model.intercept_ = np.round(model.intercept_, 2)
89
-
90
- score = model.score(new_X, y)
91
- return model, score
92
-
93
-
94
- def powerset(iterable):
95
- "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
96
- s = list(iterable)
97
- return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s) + 1))
98
-
99
-
100
- def multiprocess_task(transformation, oracle):
101
- """
102
- Takes in a constraint and oracle and returns (constraint, model) if the value from discover is true else returns None
103
- """
104
- value, truth = discover(transformation, oracle)
105
- if value == True:
106
- return truth
107
- else:
108
- return None
109
-
110
-
111
- def naive_procedure(oracle):
112
- """
113
- Takes in an oracle and gives out an exhaustive list of form [(constraint, model)] for all true constraints
114
- """
115
- nvariables = oracle.nvariables
116
- var_list = range(nvariables)
117
- pairs = itertools.combinations(var_list, r=2)
118
- sets = [x for x in powerset(var_list) if len(x) > 0]
119
- final = []
120
- transformations = []
121
- for pair in pairs:
122
- transformations.append(SymTransformation(pair[0], pair[1]))
123
- pass
124
- for smallset in sets:
125
- if len(smallset) > 1:
126
- transformations.append(ValueTransformation(smallset))
127
- transformations.append(ZeroTransformation(smallset))
128
-
129
- pass
130
- # with concurrent.futures.ProcessPoolExecutor() as executor:
131
- # args = [(constraint, oracle) for constraint in constraints]
132
- # results = executor.map(lambda x: multiprocess_task(*x), args)
133
-
134
- temp = [multiprocess_task(transformation, oracle) for transformation in transformations]
135
- for t in temp:
136
- if t is not None:
137
- final.append(t)
138
- return final
139
-
140
-
141
- def process_from_problems(problems):
142
- ids = []
143
- forms = []
144
- ns = []
145
- for problem in problems:
146
- nvariables = problem.n_vars
147
- form = problem.form
148
- variable_names = problem.var_names
149
- id = problem.eq_id
150
-
151
- oracle = Oracle(nvariables, form=form, variable_names=variable_names, id=id)
152
- ids.append(oracle.id)
153
- forms.append(oracle.form)
154
- ns = len(naive_procedure(oracle))
155
- d = {"id": ids, "form": forms, "Number of Constraints": ns}
156
- return d
157
-
158
-
159
- def process_from_form_and_names(form, variable_names):
160
- """
161
- Returns a julia string which declares an array called TRUTHS
162
- """
163
- if form is None or variable_names is None:
164
- return "TRUTHS = []"
165
- nvars = len(variable_names)
166
- oracle = Oracle(nvariables=nvars, form=form, variable_names=variable_names)
167
- truths = naive_procedure(oracle)
168
- print("Discovered the following Auxiliary Truths")
169
- for truth in truths:
170
- print(truth)
171
- julia_string = "TRUTHS = ["
172
- for truth in truths:
173
- addition = truth.julia_string()
174
- julia_string = julia_string + addition + ", "
175
- julia_string = julia_string + "]"
176
- return julia_string
177
-
178
-
179
- if __name__ == "__main__":
180
- from Transformation import SymTransformation
181
- from Oracle import Oracle
182
- from time import time
183
-
184
- variable_names = ["alpha", "beta"]
185
- form = "alpha * beta"
186
- nvariables = len(variable_names)
187
- # range_restriction={2: (1, 20)}
188
- oracle = Oracle(nvariables, form=form, variable_names=variable_names)
189
- now = time()
190
- finals = naive_procedure(oracle)
191
- end = time()
192
- print(finals)
193
- print(end - now)