Spaces:

MilesCranmer
/

PySR

Sleeping

App Files Files Community

PySR / lgga /constraint_discovery.py

AutonLabTruth

Added Truth Python Files

b865169 almost 4 years ago

raw

history blame

6.3 kB

	from sklearn.linear_model import LinearRegression
	from Transformation import *
	from Truth import *
	import itertools
	import warnings
	import traceback


	def gen_valid_points(oracle, npoints=20, default_min=0.5, default_max=30):
	"""
	Generates valid dataset (npoints, dim)
	"""
	dim = oracle.nvariables
	# print(f"Dim {dim}, {oracle.nvariables}")
	# print(f"Oracle has {oracle} {oracle.variable_names}")
	mins = []
	maxes = []
	for r in oracle.ranges:
	if r is None:
	mins.append(default_min)
	maxes.append(default_max)
	else:
	mins.append(r[0])
	maxes.append(r[1])
	return np.random.uniform(low=mins, high=maxes, size=(npoints, dim))


	def discover(transformation, oracle, npoints=20, threshold=0.98, timeout=5):
	"""
	Constraint is a class child of the Class parent Constraint

	Oracle is a class which has a variable nvariables i.e number of inputs and a function f which performs f(X)
	f(X) must be of shape (n, nvariables)

	npoints: number of data points to train the weak model with

	threshold: minimum accuracy of weak model to say that a constraint has been found

	timeout: If the random generator cannot find a valid input in timeout seconds we quit
	"""
	# Get random 10 points from some range
	start = time()
	sat = False
	while not sat and time() - start < timeout:
	try:
	points = gen_valid_points(oracle, npoints)
	y_original = oracle.f(points)
	if any(np.isnan(y_original)) or any(np.isinf(y_original)):
	print(points, points.shape, oracle)
	print(y_original)
	break
	raise ValueError()
	sat = True
	except:
	traceback.print_stack()
	if not sat:
	warnings.warn(f"Could not find an input that worked for oracle - ({oracle})")
	return False, None
	# print(points)
	X = transformation.transform(points)
	try:
	y = oracle.f(X)
	if any(np.isnan(y)) or any(np.isinf(y)):
	raise ValueError()
	except:
	# If the oracle cannot evaluate this input because of an out of domain error
	return False, None
	model, score = weak_learner(X, y, y_original)
	if score > threshold:
	return True, Truth(transformation, model)
	else:
	return False, Truth(transformation, model)


	def weak_learner(X, y, y_original):
	"""
	Takes in X, y and returns a weak learner that tries to fit the training data and its associated R^2 score as well as the model itself
	"""

	y_original = np.reshape(y_original, newshape=(len(y_original), 1))
	# print(X.shape, y_original.shape)
	new_X = np.append(X, y_original, axis=1)

	model = LinearRegression()
	model.fit(new_X, y)
	# Force the model to be simple by rounding coefficients to 2 decimal points
	model.coef_ = np.round(model.coef_, 2)
	model.intercept_ = np.round(model.intercept_, 2)

	score = model.score(new_X, y)
	return model, score


	def powerset(iterable):
	"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
	s = list(iterable)
	return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s) + 1))


	def multiprocess_task(transformation, oracle):
	"""
	Takes in a constraint and oracle and returns (constraint, model) if the value from discover is true else returns None
	"""
	value, truth = discover(transformation, oracle)
	if value == True:
	return truth
	else:
	return None


	def naive_procedure(oracle):
	"""
	Takes in an oracle and gives out an exhaustive list of form [(constraint, model)] for all true constraints
	"""
	nvariables = oracle.nvariables
	var_list = range(nvariables)
	pairs = itertools.combinations(var_list, r=2)
	sets = [x for x in powerset(var_list) if len(x) > 0]
	final = []
	transformations = []
	for pair in pairs:
	transformations.append(SymTransformation(pair[0], pair[1]))
	pass
	for smallset in sets:
	if len(smallset) > 1:
	transformations.append(ValueTransformation(smallset))
	transformations.append(ZeroTransformation(smallset))

	pass
	# with concurrent.futures.ProcessPoolExecutor() as executor:
	# args = [(constraint, oracle) for constraint in constraints]
	# results = executor.map(lambda x: multiprocess_task(*x), args)

	temp = [multiprocess_task(transformation, oracle) for transformation in transformations]
	for t in temp:
	if t is not None:
	final.append(t)
	return final


	def process_from_problems(problems):
	ids = []
	forms = []
	ns = []
	for problem in problems:
	nvariables = problem.n_vars
	form = problem.form
	variable_names = problem.var_names
	id = problem.eq_id

	oracle = Oracle(nvariables, form=form, variable_names=variable_names, id=id)
	ids.append(oracle.id)
	forms.append(oracle.form)
	ns = len(naive_procedure(oracle))
	d = {"id": ids, "form": forms, "Number of Constraints": ns}
	return d


	def process_from_form_and_names(form, variable_names):
	"""
	Returns a julia string which declares an array called TRUTHS
	"""
	if form is None or variable_names is None:
	return "TRUTHS = []"
	nvars = len(variable_names)
	oracle = Oracle(nvariables=nvars, form=form, variable_names=variable_names)
	truths = naive_procedure(oracle)
	print("Discovered the following Auxiliary Truths")
	for truth in truths:
	print(truth)
	julia_string = "TRUTHS = ["
	for truth in truths:
	addition = truth.julia_string()
	julia_string = julia_string + addition + ", "
	julia_string = julia_string + "]"
	return julia_string


	if __name__ == "__main__":
	from Transformation import SymTransformation
	from Oracle import Oracle
	from time import time

	variable_names = ["alpha", "beta"]
	form = "alpha * beta"
	nvariables = len(variable_names)
	# range_restriction={2: (1, 20)}
	oracle = Oracle(nvariables, form=form, variable_names=variable_names)
	now = time()
	finals = naive_procedure(oracle)
	end = time()
	print(finals)
	print(end - now)