PySR / lgga /constraint_discovery.py
AutonLabTruth's picture
Added Truth Python Files
b865169
raw
history blame
6.3 kB
from sklearn.linear_model import LinearRegression
from Transformation import *
from Truth import *
import itertools
import warnings
import traceback
def gen_valid_points(oracle, npoints=20, default_min=0.5, default_max=30):
"""
Generates valid dataset (npoints, dim)
"""
dim = oracle.nvariables
# print(f"Dim {dim}, {oracle.nvariables}")
# print(f"Oracle has {oracle} {oracle.variable_names}")
mins = []
maxes = []
for r in oracle.ranges:
if r is None:
mins.append(default_min)
maxes.append(default_max)
else:
mins.append(r[0])
maxes.append(r[1])
return np.random.uniform(low=mins, high=maxes, size=(npoints, dim))
def discover(transformation, oracle, npoints=20, threshold=0.98, timeout=5):
"""
Constraint is a class child of the Class parent Constraint
Oracle is a class which has a variable nvariables i.e number of inputs and a function f which performs f(X)
f(X) must be of shape (n, nvariables)
npoints: number of data points to train the weak model with
threshold: minimum accuracy of weak model to say that a constraint has been found
timeout: If the random generator cannot find a valid input in timeout seconds we quit
"""
# Get random 10 points from some range
start = time()
sat = False
while not sat and time() - start < timeout:
try:
points = gen_valid_points(oracle, npoints)
y_original = oracle.f(points)
if any(np.isnan(y_original)) or any(np.isinf(y_original)):
print(points, points.shape, oracle)
print(y_original)
break
raise ValueError()
sat = True
except:
traceback.print_stack()
if not sat:
warnings.warn(f"Could not find an input that worked for oracle - ({oracle})")
return False, None
# print(points)
X = transformation.transform(points)
try:
y = oracle.f(X)
if any(np.isnan(y)) or any(np.isinf(y)):
raise ValueError()
except:
# If the oracle cannot evaluate this input because of an out of domain error
return False, None
model, score = weak_learner(X, y, y_original)
if score > threshold:
return True, Truth(transformation, model)
else:
return False, Truth(transformation, model)
def weak_learner(X, y, y_original):
"""
Takes in X, y and returns a weak learner that tries to fit the training data and its associated R^2 score as well as the model itself
"""
y_original = np.reshape(y_original, newshape=(len(y_original), 1))
# print(X.shape, y_original.shape)
new_X = np.append(X, y_original, axis=1)
model = LinearRegression()
model.fit(new_X, y)
# Force the model to be simple by rounding coefficients to 2 decimal points
model.coef_ = np.round(model.coef_, 2)
model.intercept_ = np.round(model.intercept_, 2)
score = model.score(new_X, y)
return model, score
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s) + 1))
def multiprocess_task(transformation, oracle):
"""
Takes in a constraint and oracle and returns (constraint, model) if the value from discover is true else returns None
"""
value, truth = discover(transformation, oracle)
if value == True:
return truth
else:
return None
def naive_procedure(oracle):
"""
Takes in an oracle and gives out an exhaustive list of form [(constraint, model)] for all true constraints
"""
nvariables = oracle.nvariables
var_list = range(nvariables)
pairs = itertools.combinations(var_list, r=2)
sets = [x for x in powerset(var_list) if len(x) > 0]
final = []
transformations = []
for pair in pairs:
transformations.append(SymTransformation(pair[0], pair[1]))
pass
for smallset in sets:
if len(smallset) > 1:
transformations.append(ValueTransformation(smallset))
transformations.append(ZeroTransformation(smallset))
pass
# with concurrent.futures.ProcessPoolExecutor() as executor:
# args = [(constraint, oracle) for constraint in constraints]
# results = executor.map(lambda x: multiprocess_task(*x), args)
temp = [multiprocess_task(transformation, oracle) for transformation in transformations]
for t in temp:
if t is not None:
final.append(t)
return final
def process_from_problems(problems):
ids = []
forms = []
ns = []
for problem in problems:
nvariables = problem.n_vars
form = problem.form
variable_names = problem.var_names
id = problem.eq_id
oracle = Oracle(nvariables, form=form, variable_names=variable_names, id=id)
ids.append(oracle.id)
forms.append(oracle.form)
ns = len(naive_procedure(oracle))
d = {"id": ids, "form": forms, "Number of Constraints": ns}
return d
def process_from_form_and_names(form, variable_names):
"""
Returns a julia string which declares an array called TRUTHS
"""
if form is None or variable_names is None:
return "TRUTHS = []"
nvars = len(variable_names)
oracle = Oracle(nvariables=nvars, form=form, variable_names=variable_names)
truths = naive_procedure(oracle)
print("Discovered the following Auxiliary Truths")
for truth in truths:
print(truth)
julia_string = "TRUTHS = ["
for truth in truths:
addition = truth.julia_string()
julia_string = julia_string + addition + ", "
julia_string = julia_string + "]"
return julia_string
if __name__ == "__main__":
from Transformation import SymTransformation
from Oracle import Oracle
from time import time
variable_names = ["alpha", "beta"]
form = "alpha * beta"
nvariables = len(variable_names)
# range_restriction={2: (1, 20)}
oracle = Oracle(nvariables, form=form, variable_names=variable_names)
now = time()
finals = naive_procedure(oracle)
end = time()
print(finals)
print(end - now)