AutonLabTruth commited on
Commit
b865169
1 Parent(s): a79a3fb

Added Truth Python Files

Browse files
lgga/FeynmanProblem.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import tempfile, os, pdb, csv, traceback,random, time
4
+
5
+
6
+ class FeynmanProblem:
7
+ def __init__(self, row, gen=False):
8
+ self.eq_id = row['Filename']
9
+ self.form = row['Formula']
10
+ self.n_vars = int(row['# variables'])
11
+ self.var_names = [row[f'v{i+1}_name'] for i in range(self.n_vars)]
12
+ self.low = [float(row[f'v{i+1}_low']) for i in range(self.n_vars)]
13
+ self.high = [float(row[f'v{i+1}_high']) for i in range(self.n_vars)]
14
+ self.dp = 500#int(row[f'datapoints'])
15
+ self.X = None
16
+ self.Y = None
17
+ if gen:
18
+ self.X = np.random.uniform(0.01, 25, size=(self.dp, self.n_vars))
19
+ d = {}
20
+ for var in range(len(self.var_names)):
21
+ d[self.var_names[var]] = self.X[:, var]
22
+ d['exp'] = np.exp
23
+ d['sqrt'] = np.sqrt
24
+ d['pi'] = np.pi
25
+ d['cos'] = np.cos
26
+ d['sin'] = np.sin
27
+ d['tan'] = np.tan
28
+ d['tanh'] = np.tanh
29
+ d['ln'] = np.log
30
+ d['arcsin'] = np.arcsin
31
+ self.Y = eval(self.form,d)
32
+ return
33
+
34
+ def __str__(self):
35
+ return f"Feynman Equation: {self.eq_id}|Form: {self.form}"
36
+
37
+ def __repr__(self):
38
+ return str(self)
39
+
40
+ def mk_problems(first=100, gen=False, data_dir="datasets/FeynmanEquations.csv"):
41
+ ret = []
42
+ with open(data_dir) as csvfile:
43
+ ind = 0
44
+ reader = csv.DictReader(csvfile)
45
+ for i, row in enumerate(reader):
46
+ if ind > first:
47
+ break
48
+ if row['Filename'] == '': continue
49
+ try:
50
+ p = FeynmanProblem(row, gen=gen)
51
+ ret.append(p)
52
+ except Exception as e:
53
+ #traceback.print_exc()
54
+ #print(row)
55
+ print(f"FAILED ON ROW {i}")
56
+ ind += 1
57
+ return ret
58
+
59
+
60
+ if __name__ == "__main__":
61
+ ret = FeynmanProblem.mk_problems(first=100, gen=True)
62
+ print(ret)
lgga/Oracle.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ class Oracle:
5
+ oracle_d = {'exp': np.exp, 'sqrt': np.sqrt, 'pi': np.pi, 'cos': np.cos, 'sin': np.sin, 'tan': np.tan,
6
+ 'tanh': np.tanh, 'ln': np.log, 'arcsin': np.arcsin}
7
+
8
+ def __init__(self, nvariables, f=None, form=None, variable_names=None, range_restriction={}, id=None):
9
+ """
10
+ nvariables: is the number of variables the function takes in
11
+ f: takes in an X of shape (n, nvariables) and returns f(X) of shape (n,)
12
+ form: String Def of the function
13
+ variable_names: variable names used in form
14
+ Range_restrictions: Dictionary of form {variable_index: (low, high)}
15
+ """
16
+ self.nvariables = nvariables
17
+ if f is None and form is None:
18
+ raise ValueError("f and form are both none in Oracle initialization. Specify at least one")
19
+ if f is not None and form is not None:
20
+ raise ValueError("f and form are both not none, pick only one")
21
+ if form is not None and variable_names is None:
22
+ raise ValueError("If form is provided then variable_names must also be provided")
23
+ if form is not None:
24
+ self.form = form
25
+ self.variable_names = variable_names
26
+ self.use_func = False
27
+ self.d = Oracle.oracle_d.copy()
28
+ for var_name in variable_names:
29
+ self.d[var_name] = None
30
+ else:
31
+ # f is not None
32
+ self.func = f
33
+ self.use_func = True
34
+
35
+ self.ranges = []
36
+ for i in range(nvariables):
37
+ if i in range_restriction:
38
+ self.ranges.append(range_restriction[i])
39
+ else:
40
+ self.ranges.append(None)
41
+
42
+ if id is not None:
43
+ self.id = id
44
+ return
45
+
46
+ def f(self, X):
47
+ """
48
+ X is of shape (n, nvariables)
49
+ """
50
+ if self.invalid_input(X):
51
+ raise ValueError("Invalid input to Oracle")
52
+ if self.use_func:
53
+ return self.func(X)
54
+ else:
55
+ return self.form_f(X)
56
+
57
+ def form_f(self, X):
58
+ """
59
+ Returns the function output using form
60
+ """
61
+ for i, var in enumerate(self.variable_names):
62
+ self.d[var] = X[:, i]
63
+ return eval(self.form, self.d)
64
+
65
+ def invalid_input(self, X):
66
+ """
67
+ Returns true if any of the following are true
68
+ X has more or less variables than nvariables
69
+ X has a value in a restricted range variable outside said range
70
+ """
71
+ if X.shape[1] != self.nvariables:
72
+ return True
73
+ for i, r in enumerate(self.ranges):
74
+ if r is None:
75
+ continue
76
+ else:
77
+ low = r[0]
78
+ high = r[1]
79
+ low_check = all(low <= X[:, i])
80
+ high_check = all(X[:, i] <= high)
81
+ if not low_check or not high_check:
82
+ return True
83
+
84
+ def __str__(self):
85
+ if self.id:
86
+ return str(self.id)
87
+ elif self.form:
88
+ return str(self.form)
89
+ else:
90
+ return "<Un named Oracle>"
91
+
92
+ def from_problem(problem):
93
+ """
94
+ Static function to return an oracle when given an instance of class problem.
95
+ """
96
+ return Oracle(nvariables=problem.n_vars, f=None, form=problem.form, variable_names=problem.var_names,
97
+ id=problem.eq_id)
lgga/Transformation.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Transformation:
2
+ def __init__(self, index, name="Identity Transformation"):
3
+ self.name = name
4
+ self.index = index
5
+
6
+ def transform(self, X):
7
+ """
8
+ Takes in a data point of shape (n, d) and returns an augmented data point based on the constraint
9
+ """
10
+ return X
11
+
12
+ def __str__(self):
13
+ return str(self.name)
14
+
15
+ def __repr__(self):
16
+ return str(self)
17
+
18
+ def get_params(self):
19
+ raise NotImplementedError
20
+
21
+
22
+ class SymTransformation(Transformation):
23
+ def __init__(self, x1=0, x2=1):
24
+ """
25
+ x1, x2 = indices of the variables which are symmetric
26
+ """
27
+ super().__init__(1, name=f"Symmetry Between Variable {x1} and {x2}")
28
+ self.x1 = x1
29
+ self.x2 = x2
30
+
31
+ def transform(self, X):
32
+ """
33
+ """
34
+ temp = X.copy()
35
+ temp[:, self.x2] = X[:, self.x1].copy()
36
+ temp[:, self.x1] = X[:, self.x2].copy()
37
+ return temp
38
+
39
+ def get_params(self):
40
+ return [self.x1, self.x2]
41
+
42
+
43
+ class ZeroTransformation(Transformation):
44
+ def __init__(self, inds=[0]):
45
+ """
46
+ inds is a list of indices to set to 0
47
+ """
48
+ super().__init__(2, name=f"Zero Constraint for Variables {inds}")
49
+ self.inds = inds
50
+
51
+ def transform(self, X):
52
+ temp = X.copy()
53
+ for ind in self.inds:
54
+ temp[:, ind] = 0
55
+ return temp
56
+
57
+ def get_params(self):
58
+ return list(self.inds)
59
+
60
+
61
+ class ValueTransformation(Transformation):
62
+ def __init__(self, inds=[0]):
63
+ """
64
+ inds is list of indices to set to the same value as the first element in that list
65
+ """
66
+ super().__init__(3, name=f"Value Constraint for Variables {inds}")
67
+ self.inds = inds
68
+
69
+ def transform(self, X):
70
+ temp = X.copy()
71
+ val = temp[:, self.inds[0]]
72
+ for ind in self.inds[1:]:
73
+ temp[:, ind] = val
74
+ return temp
75
+
76
+ def get_params(self):
77
+ return list(self.inds)
lgga/Truth.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ class Truth:
5
+ def __init__(self, transformation, model):
6
+ self.transformation = transformation
7
+ self.weights = list(model.coef_) + [model.intercept_]
8
+
9
+ def predict(self, X, y):
10
+ transformed = self.transformation.transform(X)
11
+ res = np.zeros(shape=y.shape)
12
+ for w in range(len(self.weights)):
13
+ if w < X.shape[1]:
14
+ res = res + (X[:, w] * self.weights[w])
15
+ elif w == X.shape[1]:
16
+ res = res + (y * self.weights[w])
17
+ else:
18
+ assert w == X.shape[1] + 1
19
+ res = res + self.weights[w]
20
+ return res
21
+
22
+ def transform(self, X):
23
+ return self.transformation.transform(X)
24
+
25
+ def __str__(self):
26
+ return f"Auxiliary Truth: {self.transformation} with linear coefficients for X, y, 1 {self.weights}"
27
+
28
+ def __repr__(self):
29
+ return str(self)
30
+
31
+ def julia_string(self):
32
+ """
33
+ Return an expression that sorta creates a julia instances of Truth with these parameters
34
+ Specifically Truth(type, params, weights)
35
+ Julia indexing starts at 1 not 0 so we need to add 1 to all parameter indices
36
+ """
37
+ index = self.transformation.index
38
+ params = self.transformation.get_params()
39
+ return f"Truth({index}, {[param + 1 for param in params]}, {self.weights})"
lgga/__init__.py ADDED
File without changes
lgga/constraint_discovery.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.linear_model import LinearRegression
2
+ from Transformation import *
3
+ from Truth import *
4
+ import itertools
5
+ import warnings
6
+ import traceback
7
+
8
+
9
+ def gen_valid_points(oracle, npoints=20, default_min=0.5, default_max=30):
10
+ """
11
+ Generates valid dataset (npoints, dim)
12
+ """
13
+ dim = oracle.nvariables
14
+ # print(f"Dim {dim}, {oracle.nvariables}")
15
+ # print(f"Oracle has {oracle} {oracle.variable_names}")
16
+ mins = []
17
+ maxes = []
18
+ for r in oracle.ranges:
19
+ if r is None:
20
+ mins.append(default_min)
21
+ maxes.append(default_max)
22
+ else:
23
+ mins.append(r[0])
24
+ maxes.append(r[1])
25
+ return np.random.uniform(low=mins, high=maxes, size=(npoints, dim))
26
+
27
+
28
+ def discover(transformation, oracle, npoints=20, threshold=0.98, timeout=5):
29
+ """
30
+ Constraint is a class child of the Class parent Constraint
31
+
32
+ Oracle is a class which has a variable nvariables i.e number of inputs and a function f which performs f(X)
33
+ f(X) must be of shape (n, nvariables)
34
+
35
+ npoints: number of data points to train the weak model with
36
+
37
+ threshold: minimum accuracy of weak model to say that a constraint has been found
38
+
39
+ timeout: If the random generator cannot find a valid input in timeout seconds we quit
40
+ """
41
+ # Get random 10 points from some range
42
+ start = time()
43
+ sat = False
44
+ while not sat and time() - start < timeout:
45
+ try:
46
+ points = gen_valid_points(oracle, npoints)
47
+ y_original = oracle.f(points)
48
+ if any(np.isnan(y_original)) or any(np.isinf(y_original)):
49
+ print(points, points.shape, oracle)
50
+ print(y_original)
51
+ break
52
+ raise ValueError()
53
+ sat = True
54
+ except:
55
+ traceback.print_stack()
56
+ if not sat:
57
+ warnings.warn(f"Could not find an input that worked for oracle - ({oracle})")
58
+ return False, None
59
+ # print(points)
60
+ X = transformation.transform(points)
61
+ try:
62
+ y = oracle.f(X)
63
+ if any(np.isnan(y)) or any(np.isinf(y)):
64
+ raise ValueError()
65
+ except:
66
+ # If the oracle cannot evaluate this input because of an out of domain error
67
+ return False, None
68
+ model, score = weak_learner(X, y, y_original)
69
+ if score > threshold:
70
+ return True, Truth(transformation, model)
71
+ else:
72
+ return False, Truth(transformation, model)
73
+
74
+
75
+ def weak_learner(X, y, y_original):
76
+ """
77
+ Takes in X, y and returns a weak learner that tries to fit the training data and its associated R^2 score as well as the model itself
78
+ """
79
+
80
+ y_original = np.reshape(y_original, newshape=(len(y_original), 1))
81
+ # print(X.shape, y_original.shape)
82
+ new_X = np.append(X, y_original, axis=1)
83
+
84
+ model = LinearRegression()
85
+ model.fit(new_X, y)
86
+ # Force the model to be simple by rounding coefficients to 2 decimal points
87
+ model.coef_ = np.round(model.coef_, 2)
88
+ model.intercept_ = np.round(model.intercept_, 2)
89
+
90
+ score = model.score(new_X, y)
91
+ return model, score
92
+
93
+
94
+ def powerset(iterable):
95
+ "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
96
+ s = list(iterable)
97
+ return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s) + 1))
98
+
99
+
100
+ def multiprocess_task(transformation, oracle):
101
+ """
102
+ Takes in a constraint and oracle and returns (constraint, model) if the value from discover is true else returns None
103
+ """
104
+ value, truth = discover(transformation, oracle)
105
+ if value == True:
106
+ return truth
107
+ else:
108
+ return None
109
+
110
+
111
+ def naive_procedure(oracle):
112
+ """
113
+ Takes in an oracle and gives out an exhaustive list of form [(constraint, model)] for all true constraints
114
+ """
115
+ nvariables = oracle.nvariables
116
+ var_list = range(nvariables)
117
+ pairs = itertools.combinations(var_list, r=2)
118
+ sets = [x for x in powerset(var_list) if len(x) > 0]
119
+ final = []
120
+ transformations = []
121
+ for pair in pairs:
122
+ transformations.append(SymTransformation(pair[0], pair[1]))
123
+ pass
124
+ for smallset in sets:
125
+ if len(smallset) > 1:
126
+ transformations.append(ValueTransformation(smallset))
127
+ transformations.append(ZeroTransformation(smallset))
128
+
129
+ pass
130
+ # with concurrent.futures.ProcessPoolExecutor() as executor:
131
+ # args = [(constraint, oracle) for constraint in constraints]
132
+ # results = executor.map(lambda x: multiprocess_task(*x), args)
133
+
134
+ temp = [multiprocess_task(transformation, oracle) for transformation in transformations]
135
+ for t in temp:
136
+ if t is not None:
137
+ final.append(t)
138
+ return final
139
+
140
+
141
+ def process_from_problems(problems):
142
+ ids = []
143
+ forms = []
144
+ ns = []
145
+ for problem in problems:
146
+ nvariables = problem.n_vars
147
+ form = problem.form
148
+ variable_names = problem.var_names
149
+ id = problem.eq_id
150
+
151
+ oracle = Oracle(nvariables, form=form, variable_names=variable_names, id=id)
152
+ ids.append(oracle.id)
153
+ forms.append(oracle.form)
154
+ ns = len(naive_procedure(oracle))
155
+ d = {"id": ids, "form": forms, "Number of Constraints": ns}
156
+ return d
157
+
158
+
159
+ def process_from_form_and_names(form, variable_names):
160
+ """
161
+ Returns a julia string which declares an array called TRUTHS
162
+ """
163
+ if form is None or variable_names is None:
164
+ return "TRUTHS = []"
165
+ nvars = len(variable_names)
166
+ oracle = Oracle(nvariables=nvars, form=form, variable_names=variable_names)
167
+ truths = naive_procedure(oracle)
168
+ print("Discovered the following Auxiliary Truths")
169
+ for truth in truths:
170
+ print(truth)
171
+ julia_string = "TRUTHS = ["
172
+ for truth in truths:
173
+ addition = truth.julia_string()
174
+ julia_string = julia_string + addition + ", "
175
+ julia_string = julia_string + "]"
176
+ return julia_string
177
+
178
+
179
+ if __name__ == "__main__":
180
+ from Transformation import SymTransformation
181
+ from Oracle import Oracle
182
+ from time import time
183
+
184
+ variable_names = ["alpha", "beta"]
185
+ form = "alpha * beta"
186
+ nvariables = len(variable_names)
187
+ # range_restriction={2: (1, 20)}
188
+ oracle = Oracle(nvariables, form=form, variable_names=variable_names)
189
+ now = time()
190
+ finals = naive_procedure(oracle)
191
+ end = time()
192
+ print(finals)
193
+ print(end - now)