Spaces:

AshanGimhana
/

Aging_MouthReplace

Paused

App Files Files Community

Aging_MouthReplace / dlibs /dlib /control /lspi.h

AshanGimhana

Upload folder using huggingface_hub

9375c9a verified 9 months ago

raw

history blame contribute delete

4.95 kB

	// Copyright (C) 2015 Davis E. King ([email protected])
	// License: Boost Software License See LICENSE.txt for the full license.
	#ifndef DLIB_LSPI_Hh_
	#define DLIB_LSPI_Hh_

	#include "lspi_abstract.h"
	#include "approximate_linear_models.h"

	namespace dlib
	{

	// ----------------------------------------------------------------------------------------

	template <
	typename feature_extractor
	>
	class lspi
	{
	public:
	typedef feature_extractor feature_extractor_type;
	typedef typename feature_extractor::state_type state_type;
	typedef typename feature_extractor::action_type action_type;

	explicit lspi(
	const feature_extractor& fe_
	) : fe(fe_)
	{
	init();
	}

	lspi(
	)
	{
	init();
	}

	double get_discount (
	) const { return discount; }

	void set_discount (
	double value
	)
	{
	// make sure requires clause is not broken
	DLIB_ASSERT(0 < value && value <= 1,
	"\t void lspi::set_discount(value)"
	<< "\n\t invalid inputs were given to this function"
	<< "\n\t value: " << value
	);
	discount = value;
	}

	const feature_extractor& get_feature_extractor (
	) const { return fe; }

	void be_verbose (
	)
	{
	verbose = true;
	}

	void be_quiet (
	)
	{
	verbose = false;
	}

	void set_epsilon (
	double eps_
	)
	{
	// make sure requires clause is not broken
	DLIB_ASSERT(eps_ > 0,
	"\t void lspi::set_epsilon(eps_)"
	<< "\n\t invalid inputs were given to this function"
	<< "\n\t eps_: " << eps_
	);
	eps = eps_;
	}

	double get_epsilon (
	) const
	{
	return eps;
	}

	void set_lambda (
	double lambda_
	)
	{
	// make sure requires clause is not broken
	DLIB_ASSERT(lambda_ >= 0,
	"\t void lspi::set_lambda(lambda_)"
	<< "\n\t invalid inputs were given to this function"
	<< "\n\t lambda_: " << lambda_
	);
	lambda = lambda_;
	}

	double get_lambda (
	) const
	{
	return lambda;
	}

	void set_max_iterations (
	unsigned long max_iter
	) { max_iterations = max_iter; }

	unsigned long get_max_iterations (
	) { return max_iterations; }

	template <typename vector_type>
	policy<feature_extractor> train (
	const vector_type& samples
	) const
	{
	// make sure requires clause is not broken
	DLIB_ASSERT(samples.size() > 0,
	"\t policy lspi::train(samples)"
	<< "\n\t invalid inputs were given to this function"
	);

	matrix<double,0,1> w(fe.num_features());
	w = 0;
	matrix<double,0,1> prev_w, b, f1, f2;

	matrix<double> A;

	double change;
	unsigned long iter = 0;
	do
	{
	A = identity_matrix<double>(fe.num_features())*lambda;
	b = 0;
	for (unsigned long i = 0; i < samples.size(); ++i)
	{
	fe.get_features(samples[i].state, samples[i].action, f1);
	fe.get_features(samples[i].next_state,
	fe.find_best_action(samples[i].next_state,w),
	f2);
	A += f1trans(f1 - discountf2);
	b += f1*samples[i].reward;
	}

	prev_w = w;
	if (feature_extractor::force_last_weight_to_1)
	w = join_cols(pinv(colm(A,range(0,A.nc()-2)))*(b-colm(A,A.nc()-1)),mat(1.0));
	else
	w = pinv(A)*b;

	change = length(w-prev_w);
	++iter;

	if (verbose)
	std::cout << "iteration: " << iter << "\tchange: " << change << std::endl;

	} while(change > eps && iter < max_iterations);

	return policy<feature_extractor>(w,fe);
	}


	private:

	void init()
	{
	lambda = 0.01;
	discount = 0.8;
	eps = 0.01;
	verbose = false;
	max_iterations = 100;
	}

	double lambda;
	double discount;
	double eps;
	bool verbose;
	unsigned long max_iterations;
	feature_extractor fe;
	};

	// ----------------------------------------------------------------------------------------

	}

	#endif // DLIB_LSPI_Hh_