Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

69.1 kB

	/*
	Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin
	All rights reserved.

	Redistribution and use in source and binary forms, with or without
	modification, are permitted provided that the following conditions
	are met:

	1. Redistributions of source code must retain the above copyright
	notice, this list of conditions and the following disclaimer.

	2. Redistributions in binary form must reproduce the above copyright
	notice, this list of conditions and the following disclaimer in the
	documentation and/or other materials provided with the distribution.

	3. Neither name of copyright holders nor the names of its contributors
	may be used to endorse or promote products derived from this software
	without specific prior written permission.


	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
	CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
	EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
	LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
	NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	/*
	Modified 2010:

	- Support for dense data by Ming-Fang Weng

	- Return indices for support vectors, Fabian Pedregosa
	<[email protected]>

	- Fixes to avoid name collision, Fabian Pedregosa

	- Add support for instance weights, Fabian Pedregosa based on work
	by Ming-Wei Chang, Hsuan-Tien Lin, Ming-Hen Tsai, Chia-Hua Ho and
	Hsiang-Fu Yu,
	<https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/#weights_for_data_instances>.

	- Make labels sorted in svm_group_classes, Fabian Pedregosa.

	Modified 2020:

	- Improved random number generator by using a mersenne twister + tweaked
	lemire postprocessor. This fixed a convergence issue on windows targets.
	Sylvain Marie, Schneider Electric
	see <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>

	Modified 2021:

	- Exposed number of iterations run in optimization, Juan Martín Loyola.
	See <https://github.com/scikit-learn/scikit-learn/pull/21408/>
	*/

	#include <math.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <ctype.h>
	#include <float.h>
	#include <string.h>
	#include <stdarg.h>
	#include <climits>
	#include <random>
	#include "svm.h"
	#include "_svm_cython_blas_helpers.h"
	#include "../newrand/newrand.h"


	#ifndef _LIBSVM_CPP
	typedef float Qfloat;
	typedef signed char schar;
	#ifndef min
	template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
	#endif
	#ifndef max
	template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
	#endif
	template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
	template <class S, class T> static inline void clone(T& dst, S src, int n)
	{
	dst = new T[n];
	memcpy((void )dst,(void )src,sizeof(T)*n);
	}
	static inline double powi(double base, int times)
	{
	double tmp = base, ret = 1.0;

	for(int t=times; t>0; t/=2)
	{
	if(t%2==1) ret*=tmp;
	tmp = tmp * tmp;
	}
	return ret;
	}
	#define INF HUGE_VAL
	#define TAU 1e-12
	#define Malloc(type,n) (type )malloc((n)sizeof(type))

	static void print_string_stdout(const char *s)
	{
	fputs(s,stdout);
	fflush(stdout);
	}
	static void (svm_print_string) (const char ) = &print_string_stdout;

	static void info(const char *fmt,...)
	{
	char buf[BUFSIZ];
	va_list ap;
	va_start(ap,fmt);
	vsprintf(buf,fmt,ap);
	va_end(ap);
	(*svm_print_string)(buf);
	}
	#endif
	#define _LIBSVM_CPP


	/* yeah, this is ugly. It helps us to have unique names for both sparse
	and dense versions of this library */
	#ifdef _DENSE_REP
	#ifdef PREFIX
	#undef PREFIX
	#endif
	#ifdef NAMESPACE
	#undef NAMESPACE
	#endif
	#define PREFIX(name) svm_##name
	#define NAMESPACE svm
	namespace svm {
	#else
	/* sparse representation */
	#ifdef PREFIX
	#undef PREFIX
	#endif
	#ifdef NAMESPACE
	#undef NAMESPACE
	#endif
	#define PREFIX(name) svm_csr_##name
	#define NAMESPACE svm_csr
	namespace svm_csr {
	#endif


	//
	// Kernel Cache
	//
	// l is the number of total data items
	// size is the cache size limit in bytes
	//
	class Cache
	{
	public:
	Cache(int l,long int size);
	~Cache();

	// request data [0,len)
	// return some position p where [p,len) need to be filled
	// (p >= len if nothing needs to be filled)
	int get_data(const int index, Qfloat **data, int len);
	void swap_index(int i, int j);
	private:
	int l;
	long int size;
	struct head_t
	{
	head_t prev, next; // a circular list
	Qfloat *data;
	int len; // data[0,len) is cached in this entry
	};

	head_t *head;
	head_t lru_head;
	void lru_delete(head_t *h);
	void lru_insert(head_t *h);
	};

	Cache::Cache(int l_,long int size_):l(l_),size(size_)
	{
	head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
	size /= sizeof(Qfloat);
	size -= l * sizeof(head_t) / sizeof(Qfloat);
	size = max(size, 2 * (long int) l); // cache must be large enough for two columns
	lru_head.next = lru_head.prev = &lru_head;
	}

	Cache::~Cache()
	{
	for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
	free(h->data);
	free(head);
	}

	void Cache::lru_delete(head_t *h)
	{
	// delete from current location
	h->prev->next = h->next;
	h->next->prev = h->prev;
	}

	void Cache::lru_insert(head_t *h)
	{
	// insert to last position
	h->next = &lru_head;
	h->prev = lru_head.prev;
	h->prev->next = h;
	h->next->prev = h;
	}

	int Cache::get_data(const int index, Qfloat **data, int len)
	{
	head_t *h = &head[index];
	if(h->len) lru_delete(h);
	int more = len - h->len;

	if(more > 0)
	{
	// free old space
	while(size < more)
	{
	head_t *old = lru_head.next;
	lru_delete(old);
	free(old->data);
	size += old->len;
	old->data = 0;
	old->len = 0;
	}

	// allocate new space
	h->data = (Qfloat )realloc(h->data,sizeof(Qfloat)len);
	size -= more;
	swap(h->len,len);
	}

	lru_insert(h);
	*data = h->data;
	return len;
	}

	void Cache::swap_index(int i, int j)
	{
	if(i==j) return;

	if(head[i].len) lru_delete(&head[i]);
	if(head[j].len) lru_delete(&head[j]);
	swap(head[i].data,head[j].data);
	swap(head[i].len,head[j].len);
	if(head[i].len) lru_insert(&head[i]);
	if(head[j].len) lru_insert(&head[j]);

	if(i>j) swap(i,j);
	for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
	{
	if(h->len > i)
	{
	if(h->len > j)
	swap(h->data[i],h->data[j]);
	else
	{
	// give up
	lru_delete(h);
	free(h->data);
	size += h->len;
	h->data = 0;
	h->len = 0;
	}
	}
	}
	}

	//
	// Kernel evaluation
	//
	// the static method k_function is for doing single kernel evaluation
	// the constructor of Kernel prepares to calculate the l*l kernel matrix
	// the member function get_Q is for getting one column from the Q Matrix
	//
	class QMatrix {
	public:
	virtual Qfloat *get_Q(int column, int len) const = 0;
	virtual double *get_QD() const = 0;
	virtual void swap_index(int i, int j) const = 0;
	virtual ~QMatrix() {}
	};

	class Kernel: public QMatrix {
	public:
	#ifdef _DENSE_REP
	Kernel(int l, PREFIX(node) * x, const svm_parameter& param, BlasFunctions *blas_functions);
	#else
	Kernel(int l, PREFIX(node) * const * x, const svm_parameter& param, BlasFunctions *blas_functions);
	#endif
	virtual ~Kernel();

	static double k_function(const PREFIX(node) x, const PREFIX(node) y,
	const svm_parameter& param, BlasFunctions *blas_functions);
	virtual Qfloat *get_Q(int column, int len) const = 0;
	virtual double *get_QD() const = 0;
	virtual void swap_index(int i, int j) const // no so const...
	{
	swap(x[i],x[j]);
	if(x_square) swap(x_square[i],x_square[j]);
	}
	protected:

	double (Kernel::*kernel_function)(int i, int j) const;

	private:
	#ifdef _DENSE_REP
	PREFIX(node) *x;
	#else
	const PREFIX(node) **x;
	#endif
	double *x_square;
	// scipy blas pointer
	BlasFunctions *m_blas;

	// svm_parameter
	const int kernel_type;
	const int degree;
	const double gamma;
	const double coef0;

	static double dot(const PREFIX(node) px, const PREFIX(node) py, BlasFunctions *blas_functions);
	#ifdef _DENSE_REP
	static double dot(const PREFIX(node) &px, const PREFIX(node) &py, BlasFunctions *blas_functions);
	#endif

	double kernel_linear(int i, int j) const
	{
	return dot(x[i],x[j],m_blas);
	}
	double kernel_poly(int i, int j) const
	{
	return powi(gamma*dot(x[i],x[j],m_blas)+coef0,degree);
	}
	double kernel_rbf(int i, int j) const
	{
	return exp(-gamma(x_square[i]+x_square[j]-2dot(x[i],x[j],m_blas)));
	}
	double kernel_sigmoid(int i, int j) const
	{
	return tanh(gamma*dot(x[i],x[j],m_blas)+coef0);
	}
	double kernel_precomputed(int i, int j) const
	{
	#ifdef _DENSE_REP
	return (x+i)->values[x[j].ind];
	#else
	return x[i][(int)(x[j][0].value)].value;
	#endif
	}
	};

	#ifdef _DENSE_REP
	Kernel::Kernel(int l, PREFIX(node) * x_, const svm_parameter& param, BlasFunctions *blas_functions)
	#else
	Kernel::Kernel(int l, PREFIX(node) * const * x_, const svm_parameter& param, BlasFunctions *blas_functions)
	#endif
	:kernel_type(param.kernel_type), degree(param.degree),
	gamma(param.gamma), coef0(param.coef0)
	{
	m_blas = blas_functions;
	switch(kernel_type)
	{
	case LINEAR:
	kernel_function = &Kernel::kernel_linear;
	break;
	case POLY:
	kernel_function = &Kernel::kernel_poly;
	break;
	case RBF:
	kernel_function = &Kernel::kernel_rbf;
	break;
	case SIGMOID:
	kernel_function = &Kernel::kernel_sigmoid;
	break;
	case PRECOMPUTED:
	kernel_function = &Kernel::kernel_precomputed;
	break;
	}

	clone(x,x_,l);

	if(kernel_type == RBF)
	{
	x_square = new double[l];
	for(int i=0;i<l;i++)
	x_square[i] = dot(x[i],x[i],blas_functions);
	}
	else
	x_square = 0;
	}

	Kernel::~Kernel()
	{
	delete[] x;
	delete[] x_square;
	}

	#ifdef _DENSE_REP
	double Kernel::dot(const PREFIX(node) px, const PREFIX(node) py, BlasFunctions *blas_functions)
	{
	double sum = 0;

	int dim = min(px->dim, py->dim);
	sum = blas_functions->dot(dim, px->values, 1, py->values, 1);
	return sum;
	}

	double Kernel::dot(const PREFIX(node) &px, const PREFIX(node) &py, BlasFunctions *blas_functions)
	{
	double sum = 0;

	int dim = min(px.dim, py.dim);
	sum = blas_functions->dot(dim, px.values, 1, py.values, 1);
	return sum;
	}
	#else
	double Kernel::dot(const PREFIX(node) px, const PREFIX(node) py, BlasFunctions *blas_functions)
	{
	double sum = 0;
	while(px->index != -1 && py->index != -1)
	{
	if(px->index == py->index)
	{
	sum += px->value * py->value;
	++px;
	++py;
	}
	else
	{
	if(px->index > py->index)
	++py;
	else
	++px;
	}
	}
	return sum;
	}
	#endif

	double Kernel::k_function(const PREFIX(node) x, const PREFIX(node) y,
	const svm_parameter& param, BlasFunctions *blas_functions)
	{
	switch(param.kernel_type)
	{
	case LINEAR:
	return dot(x,y,blas_functions);
	case POLY:
	return powi(param.gamma*dot(x,y,blas_functions)+param.coef0,param.degree);
	case RBF:
	{
	double sum = 0;
	#ifdef _DENSE_REP
	int dim = min(x->dim, y->dim), i;
	double* m_array = (double)malloc(sizeof(double)dim);
	for (i = 0; i < dim; i++)
	{
	m_array[i] = x->values[i] - y->values[i];
	}
	sum = blas_functions->dot(dim, m_array, 1, m_array, 1);
	free(m_array);
	for (; i < x->dim; i++)
	sum += x->values[i] * x->values[i];
	for (; i < y->dim; i++)
	sum += y->values[i] * y->values[i];
	#else
	while(x->index != -1 && y->index !=-1)
	{
	if(x->index == y->index)
	{
	double d = x->value - y->value;
	sum += d*d;
	++x;
	++y;
	}
	else
	{
	if(x->index > y->index)
	{
	sum += y->value * y->value;
	++y;
	}
	else
	{
	sum += x->value * x->value;
	++x;
	}
	}
	}

	while(x->index != -1)
	{
	sum += x->value * x->value;
	++x;
	}

	while(y->index != -1)
	{
	sum += y->value * y->value;
	++y;
	}
	#endif
	return exp(-param.gamma*sum);
	}
	case SIGMOID:
	return tanh(param.gamma*dot(x,y,blas_functions)+param.coef0);
	case PRECOMPUTED: //x: test (validation), y: SV
	{
	#ifdef _DENSE_REP
	return x->values[y->ind];
	#else
	return x[(int)(y->value)].value;
	#endif
	}
	default:
	return 0; // Unreachable
	}
	}
	// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
	// Solves:
	//
	// min 0.5(\alpha^T Q \alpha) + p^T \alpha
	//
	// y^T \alpha = \delta
	// y_i = +1 or -1
	// 0 <= alpha_i <= Cp for y_i = 1
	// 0 <= alpha_i <= Cn for y_i = -1
	//
	// Given:
	//
	// Q, p, y, Cp, Cn, and an initial feasible point \alpha
	// l is the size of vectors and matrices
	// eps is the stopping tolerance
	//
	// solution will be put in \alpha, objective value will be put in obj
	//

	class Solver {
	public:
	Solver() {};
	virtual ~Solver() {};

	struct SolutionInfo {
	double obj;
	double rho;
	double *upper_bound;
	double r; // for Solver_NU
	bool solve_timed_out;
	int n_iter;
	};

	void Solve(int l, const QMatrix& Q, const double p_, const schar y_,
	double alpha_, const double C_, double eps,
	SolutionInfo* si, int shrinking, int max_iter);
	protected:
	int active_size;
	schar *y;
	double *G; // gradient of objective function
	enum { LOWER_BOUND, UPPER_BOUND, FREE };
	char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
	double *alpha;
	const QMatrix *Q;
	const double *QD;
	double eps;
	double Cp,Cn;
	double *C;
	double *p;
	int *active_set;
	double *G_bar; // gradient, if we treat free variables as 0
	int l;
	bool unshrink; // XXX

	double get_C(int i)
	{
	return C[i];
	}
	void update_alpha_status(int i)
	{
	if(alpha[i] >= get_C(i))
	alpha_status[i] = UPPER_BOUND;
	else if(alpha[i] <= 0)
	alpha_status[i] = LOWER_BOUND;
	else alpha_status[i] = FREE;
	}
	bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
	bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
	bool is_free(int i) { return alpha_status[i] == FREE; }
	void swap_index(int i, int j);
	void reconstruct_gradient();
	virtual int select_working_set(int &i, int &j);
	virtual double calculate_rho();
	virtual void do_shrinking();
	private:
	bool be_shrunk(int i, double Gmax1, double Gmax2);
	};

	void Solver::swap_index(int i, int j)
	{
	Q->swap_index(i,j);
	swap(y[i],y[j]);
	swap(G[i],G[j]);
	swap(alpha_status[i],alpha_status[j]);
	swap(alpha[i],alpha[j]);
	swap(p[i],p[j]);
	swap(active_set[i],active_set[j]);
	swap(G_bar[i],G_bar[j]);
	swap(C[i], C[j]);
	}

	void Solver::reconstruct_gradient()
	{
	// reconstruct inactive elements of G from G_bar and free variables

	if(active_size == l) return;

	int i,j;
	int nr_free = 0;

	for(j=active_size;j<l;j++)
	G[j] = G_bar[j] + p[j];

	for(j=0;j<active_size;j++)
	if(is_free(j))
	nr_free++;

	if(2*nr_free < active_size)
	info("\nWarning: using -h 0 may be faster\n");

	if (nr_freel > 2active_size*(l-active_size))
	{
	for(i=active_size;i<l;i++)
	{
	const Qfloat *Q_i = Q->get_Q(i,active_size);
	for(j=0;j<active_size;j++)
	if(is_free(j))
	G[i] += alpha[j] * Q_i[j];
	}
	}
	else
	{
	for(i=0;i<active_size;i++)
	if(is_free(i))
	{
	const Qfloat *Q_i = Q->get_Q(i,l);
	double alpha_i = alpha[i];
	for(j=active_size;j<l;j++)
	G[j] += alpha_i * Q_i[j];
	}
	}
	}

	void Solver::Solve(int l, const QMatrix& Q, const double p_, const schar y_,
	double alpha_, const double C_, double eps,
	SolutionInfo* si, int shrinking, int max_iter)
	{
	this->l = l;
	this->Q = &Q;
	QD=Q.get_QD();
	clone(p, p_,l);
	clone(y, y_,l);
	clone(alpha,alpha_,l);
	clone(C, C_, l);
	this->eps = eps;
	unshrink = false;
	si->solve_timed_out = false;

	// initialize alpha_status
	{
	alpha_status = new char[l];
	for(int i=0;i<l;i++)
	update_alpha_status(i);
	}

	// initialize active set (for shrinking)
	{
	active_set = new int[l];
	for(int i=0;i<l;i++)
	active_set[i] = i;
	active_size = l;
	}

	// initialize gradient
	{
	G = new double[l];
	G_bar = new double[l];
	int i;
	for(i=0;i<l;i++)
	{
	G[i] = p[i];
	G_bar[i] = 0;
	}
	for(i=0;i<l;i++)
	if(!is_lower_bound(i))
	{
	const Qfloat *Q_i = Q.get_Q(i,l);
	double alpha_i = alpha[i];
	int j;
	for(j=0;j<l;j++)
	G[j] += alpha_i*Q_i[j];
	if(is_upper_bound(i))
	for(j=0;j<l;j++)
	G_bar[j] += get_C(i) * Q_i[j];
	}
	}

	// optimization step

	int iter = 0;
	int counter = min(l,1000)+1;

	while(1)
	{
	// set max_iter to -1 to disable the mechanism
	if ((max_iter != -1) && (iter >= max_iter)) {
	info("WARN: libsvm Solver reached max_iter");
	si->solve_timed_out = true;
	break;
	}

	// show progress and do shrinking

	if(--counter == 0)
	{
	counter = min(l,1000);
	if(shrinking) do_shrinking();
	info(".");
	}

	int i,j;
	if(select_working_set(i,j)!=0)
	{
	// reconstruct the whole gradient
	reconstruct_gradient();
	// reset active set size and check
	active_size = l;
	info("*");
	if(select_working_set(i,j)!=0)
	break;
	else
	counter = 1; // do shrinking next iteration
	}

	++iter;

	// update alpha[i] and alpha[j], handle bounds carefully

	const Qfloat *Q_i = Q.get_Q(i,active_size);
	const Qfloat *Q_j = Q.get_Q(j,active_size);

	double C_i = get_C(i);
	double C_j = get_C(j);

	double old_alpha_i = alpha[i];
	double old_alpha_j = alpha[j];

	if(y[i]!=y[j])
	{
	double quad_coef = QD[i]+QD[j]+2*Q_i[j];
	if (quad_coef <= 0)
	quad_coef = TAU;
	double delta = (-G[i]-G[j])/quad_coef;
	double diff = alpha[i] - alpha[j];
	alpha[i] += delta;
	alpha[j] += delta;

	if(diff > 0)
	{
	if(alpha[j] < 0)
	{
	alpha[j] = 0;
	alpha[i] = diff;
	}
	}
	else
	{
	if(alpha[i] < 0)
	{
	alpha[i] = 0;
	alpha[j] = -diff;
	}
	}
	if(diff > C_i - C_j)
	{
	if(alpha[i] > C_i)
	{
	alpha[i] = C_i;
	alpha[j] = C_i - diff;
	}
	}
	else
	{
	if(alpha[j] > C_j)
	{
	alpha[j] = C_j;
	alpha[i] = C_j + diff;
	}
	}
	}
	else
	{
	double quad_coef = QD[i]+QD[j]-2*Q_i[j];
	if (quad_coef <= 0)
	quad_coef = TAU;
	double delta = (G[i]-G[j])/quad_coef;
	double sum = alpha[i] + alpha[j];
	alpha[i] -= delta;
	alpha[j] += delta;

	if(sum > C_i)
	{
	if(alpha[i] > C_i)
	{
	alpha[i] = C_i;
	alpha[j] = sum - C_i;
	}
	}
	else
	{
	if(alpha[j] < 0)
	{
	alpha[j] = 0;
	alpha[i] = sum;
	}
	}
	if(sum > C_j)
	{
	if(alpha[j] > C_j)
	{
	alpha[j] = C_j;
	alpha[i] = sum - C_j;
	}
	}
	else
	{
	if(alpha[i] < 0)
	{
	alpha[i] = 0;
	alpha[j] = sum;
	}
	}
	}

	// update G

	double delta_alpha_i = alpha[i] - old_alpha_i;
	double delta_alpha_j = alpha[j] - old_alpha_j;

	for(int k=0;k<active_size;k++)
	{
	G[k] += Q_i[k]delta_alpha_i + Q_j[k]delta_alpha_j;
	}

	// update alpha_status and G_bar

	{
	bool ui = is_upper_bound(i);
	bool uj = is_upper_bound(j);
	update_alpha_status(i);
	update_alpha_status(j);
	int k;
	if(ui != is_upper_bound(i))
	{
	Q_i = Q.get_Q(i,l);
	if(ui)
	for(k=0;k<l;k++)
	G_bar[k] -= C_i * Q_i[k];
	else
	for(k=0;k<l;k++)
	G_bar[k] += C_i * Q_i[k];
	}

	if(uj != is_upper_bound(j))
	{
	Q_j = Q.get_Q(j,l);
	if(uj)
	for(k=0;k<l;k++)
	G_bar[k] -= C_j * Q_j[k];
	else
	for(k=0;k<l;k++)
	G_bar[k] += C_j * Q_j[k];
	}
	}
	}

	// calculate rho

	si->rho = calculate_rho();

	// calculate objective value
	{
	double v = 0;
	int i;
	for(i=0;i<l;i++)
	v += alpha[i] * (G[i] + p[i]);

	si->obj = v/2;
	}

	// put back the solution
	{
	for(int i=0;i<l;i++)
	alpha_[active_set[i]] = alpha[i];
	}

	// juggle everything back
	/*{
	for(int i=0;i<l;i++)
	while(active_set[i] != i)
	swap_index(i,active_set[i]);
	// or Q.swap_index(i,active_set[i]);
	}*/

	for(int i=0;i<l;i++)
	si->upper_bound[i] = C[i];

	// store number of iterations
	si->n_iter = iter;

	info("\noptimization finished, #iter = %d\n",iter);

	delete[] p;
	delete[] y;
	delete[] alpha;
	delete[] alpha_status;
	delete[] active_set;
	delete[] G;
	delete[] G_bar;
	delete[] C;
	}

	// return 1 if already optimal, return 0 otherwise
	int Solver::select_working_set(int &out_i, int &out_j)
	{
	// return i,j such that
	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
	// j: minimizes the decrease of obj value
	// (if quadratic coefficient <= 0, replace it with tau)
	// -y_jgrad(f)_j < -y_igrad(f)_i, j in I_low(\alpha)

	double Gmax = -INF;
	double Gmax2 = -INF;
	int Gmax_idx = -1;
	int Gmin_idx = -1;
	double obj_diff_min = INF;

	for(int t=0;t<active_size;t++)
	if(y[t]==+1)
	{
	if(!is_upper_bound(t))
	if(-G[t] >= Gmax)
	{
	Gmax = -G[t];
	Gmax_idx = t;
	}
	}
	else
	{
	if(!is_lower_bound(t))
	if(G[t] >= Gmax)
	{
	Gmax = G[t];
	Gmax_idx = t;
	}
	}

	int i = Gmax_idx;
	const Qfloat *Q_i = NULL;
	if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
	Q_i = Q->get_Q(i,active_size);

	for(int j=0;j<active_size;j++)
	{
	if(y[j]==+1)
	{
	if (!is_lower_bound(j))
	{
	double grad_diff=Gmax+G[j];
	if (G[j] >= Gmax2)
	Gmax2 = G[j];
	if (grad_diff > 0)
	{
	double obj_diff;
	double quad_coef = QD[i]+QD[j]-2.0y[i]Q_i[j];
	if (quad_coef > 0)
	obj_diff = -(grad_diff*grad_diff)/quad_coef;
	else
	obj_diff = -(grad_diff*grad_diff)/TAU;

	if (obj_diff <= obj_diff_min)
	{
	Gmin_idx=j;
	obj_diff_min = obj_diff;
	}
	}
	}
	}
	else
	{
	if (!is_upper_bound(j))
	{
	double grad_diff= Gmax-G[j];
	if (-G[j] >= Gmax2)
	Gmax2 = -G[j];
	if (grad_diff > 0)
	{
	double obj_diff;
	double quad_coef = QD[i]+QD[j]+2.0y[i]Q_i[j];
	if (quad_coef > 0)
	obj_diff = -(grad_diff*grad_diff)/quad_coef;
	else
	obj_diff = -(grad_diff*grad_diff)/TAU;

	if (obj_diff <= obj_diff_min)
	{
	Gmin_idx=j;
	obj_diff_min = obj_diff;
	}
	}
	}
	}
	}

	if(Gmax+Gmax2 < eps \|\| Gmin_idx == -1)
	return 1;

	out_i = Gmax_idx;
	out_j = Gmin_idx;
	return 0;
	}

	bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
	{
	if(is_upper_bound(i))
	{
	if(y[i]==+1)
	return(-G[i] > Gmax1);
	else
	return(-G[i] > Gmax2);
	}
	else if(is_lower_bound(i))
	{
	if(y[i]==+1)
	return(G[i] > Gmax2);
	else
	return(G[i] > Gmax1);
	}
	else
	return(false);
	}

	void Solver::do_shrinking()
	{
	int i;
	double Gmax1 = -INF; // max { -y_i * grad(f)_i \| i in I_up(\alpha) }
	double Gmax2 = -INF; // max { y_i * grad(f)_i \| i in I_low(\alpha) }

	// find maximal violating pair first
	for(i=0;i<active_size;i++)
	{
	if(y[i]==+1)
	{
	if(!is_upper_bound(i))
	{
	if(-G[i] >= Gmax1)
	Gmax1 = -G[i];
	}
	if(!is_lower_bound(i))
	{
	if(G[i] >= Gmax2)
	Gmax2 = G[i];
	}
	}
	else
	{
	if(!is_upper_bound(i))
	{
	if(-G[i] >= Gmax2)
	Gmax2 = -G[i];
	}
	if(!is_lower_bound(i))
	{
	if(G[i] >= Gmax1)
	Gmax1 = G[i];
	}
	}
	}

	if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
	{
	unshrink = true;
	reconstruct_gradient();
	active_size = l;
	info("*");
	}

	for(i=0;i<active_size;i++)
	if (be_shrunk(i, Gmax1, Gmax2))
	{
	active_size--;
	while (active_size > i)
	{
	if (!be_shrunk(active_size, Gmax1, Gmax2))
	{
	swap_index(i,active_size);
	break;
	}
	active_size--;
	}
	}
	}

	double Solver::calculate_rho()
	{
	double r;
	int nr_free = 0;
	double ub = INF, lb = -INF, sum_free = 0;
	for(int i=0;i<active_size;i++)
	{
	double yG = y[i]*G[i];

	if(is_upper_bound(i))
	{
	if(y[i]==-1)
	ub = min(ub,yG);
	else
	lb = max(lb,yG);
	}
	else if(is_lower_bound(i))
	{
	if(y[i]==+1)
	ub = min(ub,yG);
	else
	lb = max(lb,yG);
	}
	else
	{
	++nr_free;
	sum_free += yG;
	}
	}

	if(nr_free>0)
	r = sum_free/nr_free;
	else
	r = (ub+lb)/2;

	return r;
	}

	//
	// Solver for nu-svm classification and regression
	//
	// additional constraint: e^T \alpha = constant
	//
	class Solver_NU : public Solver
	{
	public:
	Solver_NU() {}
	void Solve(int l, const QMatrix& Q, const double p, const schar y,
	double alpha, const double C_, double eps,
	SolutionInfo* si, int shrinking, int max_iter)
	{
	this->si = si;
	Solver::Solve(l,Q,p,y,alpha,C_,eps,si,shrinking,max_iter);
	}
	private:
	SolutionInfo *si;
	int select_working_set(int &i, int &j);
	double calculate_rho();
	bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
	void do_shrinking();
	};

	// return 1 if already optimal, return 0 otherwise
	int Solver_NU::select_working_set(int &out_i, int &out_j)
	{
	// return i,j such that y_i = y_j and
	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
	// j: minimizes the decrease of obj value
	// (if quadratic coefficient <= 0, replace it with tau)
	// -y_jgrad(f)_j < -y_igrad(f)_i, j in I_low(\alpha)

	double Gmaxp = -INF;
	double Gmaxp2 = -INF;
	int Gmaxp_idx = -1;

	double Gmaxn = -INF;
	double Gmaxn2 = -INF;
	int Gmaxn_idx = -1;

	int Gmin_idx = -1;
	double obj_diff_min = INF;

	for(int t=0;t<active_size;t++)
	if(y[t]==+1)
	{
	if(!is_upper_bound(t))
	if(-G[t] >= Gmaxp)
	{
	Gmaxp = -G[t];
	Gmaxp_idx = t;
	}
	}
	else
	{
	if(!is_lower_bound(t))
	if(G[t] >= Gmaxn)
	{
	Gmaxn = G[t];
	Gmaxn_idx = t;
	}
	}

	int ip = Gmaxp_idx;
	int in = Gmaxn_idx;
	const Qfloat *Q_ip = NULL;
	const Qfloat *Q_in = NULL;
	if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
	Q_ip = Q->get_Q(ip,active_size);
	if(in != -1)
	Q_in = Q->get_Q(in,active_size);

	for(int j=0;j<active_size;j++)
	{
	if(y[j]==+1)
	{
	if (!is_lower_bound(j))
	{
	double grad_diff=Gmaxp+G[j];
	if (G[j] >= Gmaxp2)
	Gmaxp2 = G[j];
	if (grad_diff > 0)
	{
	double obj_diff;
	double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
	if (quad_coef > 0)
	obj_diff = -(grad_diff*grad_diff)/quad_coef;
	else
	obj_diff = -(grad_diff*grad_diff)/TAU;

	if (obj_diff <= obj_diff_min)
	{
	Gmin_idx=j;
	obj_diff_min = obj_diff;
	}
	}
	}
	}
	else
	{
	if (!is_upper_bound(j))
	{
	double grad_diff=Gmaxn-G[j];
	if (-G[j] >= Gmaxn2)
	Gmaxn2 = -G[j];
	if (grad_diff > 0)
	{
	double obj_diff;
	double quad_coef = QD[in]+QD[j]-2*Q_in[j];
	if (quad_coef > 0)
	obj_diff = -(grad_diff*grad_diff)/quad_coef;
	else
	obj_diff = -(grad_diff*grad_diff)/TAU;

	if (obj_diff <= obj_diff_min)
	{
	Gmin_idx=j;
	obj_diff_min = obj_diff;
	}
	}
	}
	}
	}

	if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps \|\| Gmin_idx == -1)
	return 1;

	if (y[Gmin_idx] == +1)
	out_i = Gmaxp_idx;
	else
	out_i = Gmaxn_idx;
	out_j = Gmin_idx;

	return 0;
	}

	bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
	{
	if(is_upper_bound(i))
	{
	if(y[i]==+1)
	return(-G[i] > Gmax1);
	else
	return(-G[i] > Gmax4);
	}
	else if(is_lower_bound(i))
	{
	if(y[i]==+1)
	return(G[i] > Gmax2);
	else
	return(G[i] > Gmax3);
	}
	else
	return(false);
	}

	void Solver_NU::do_shrinking()
	{
	double Gmax1 = -INF; // max { -y_i * grad(f)_i \| y_i = +1, i in I_up(\alpha) }
	double Gmax2 = -INF; // max { y_i * grad(f)_i \| y_i = +1, i in I_low(\alpha) }
	double Gmax3 = -INF; // max { -y_i * grad(f)_i \| y_i = -1, i in I_up(\alpha) }
	double Gmax4 = -INF; // max { y_i * grad(f)_i \| y_i = -1, i in I_low(\alpha) }

	// find maximal violating pair first
	int i;
	for(i=0;i<active_size;i++)
	{
	if(!is_upper_bound(i))
	{
	if(y[i]==+1)
	{
	if(-G[i] > Gmax1) Gmax1 = -G[i];
	}
	else if(-G[i] > Gmax4) Gmax4 = -G[i];
	}
	if(!is_lower_bound(i))
	{
	if(y[i]==+1)
	{
	if(G[i] > Gmax2) Gmax2 = G[i];
	}
	else if(G[i] > Gmax3) Gmax3 = G[i];
	}
	}

	if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
	{
	unshrink = true;
	reconstruct_gradient();
	active_size = l;
	}

	for(i=0;i<active_size;i++)
	if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
	{
	active_size--;
	while (active_size > i)
	{
	if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
	{
	swap_index(i,active_size);
	break;
	}
	active_size--;
	}
	}
	}

	double Solver_NU::calculate_rho()
	{
	int nr_free1 = 0,nr_free2 = 0;
	double ub1 = INF, ub2 = INF;
	double lb1 = -INF, lb2 = -INF;
	double sum_free1 = 0, sum_free2 = 0;

	for(int i=0;i<active_size;i++)
	{
	if(y[i]==+1)
	{
	if(is_upper_bound(i))
	lb1 = max(lb1,G[i]);
	else if(is_lower_bound(i))
	ub1 = min(ub1,G[i]);
	else
	{
	++nr_free1;
	sum_free1 += G[i];
	}
	}
	else
	{
	if(is_upper_bound(i))
	lb2 = max(lb2,G[i]);
	else if(is_lower_bound(i))
	ub2 = min(ub2,G[i]);
	else
	{
	++nr_free2;
	sum_free2 += G[i];
	}
	}
	}

	double r1,r2;
	if(nr_free1 > 0)
	r1 = sum_free1/nr_free1;
	else
	r1 = (ub1+lb1)/2;

	if(nr_free2 > 0)
	r2 = sum_free2/nr_free2;
	else
	r2 = (ub2+lb2)/2;

	si->r = (r1+r2)/2;
	return (r1-r2)/2;
	}

	//
	// Q matrices for various formulations
	//
	class SVC_Q: public Kernel
	{
	public:
	SVC_Q(const PREFIX(problem)& prob, const svm_parameter& param, const schar y_, BlasFunctions blas_functions)
	:Kernel(prob.l, prob.x, param, blas_functions)
	{
	clone(y,y_,prob.l);
	cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
	QD = new double[prob.l];
	for(int i=0;i<prob.l;i++)
	QD[i] = (this->*kernel_function)(i,i);
	}

	Qfloat *get_Q(int i, int len) const
	{
	Qfloat *data;
	int start, j;
	if((start = cache->get_data(i,&data,len)) < len)
	{
	for(j=start;j<len;j++)
	data[j] = (Qfloat)(y[i]y[j](this->*kernel_function)(i,j));
	}
	return data;
	}

	double *get_QD() const
	{
	return QD;
	}

	void swap_index(int i, int j) const
	{
	cache->swap_index(i,j);
	Kernel::swap_index(i,j);
	swap(y[i],y[j]);
	swap(QD[i],QD[j]);
	}

	~SVC_Q()
	{
	delete[] y;
	delete cache;
	delete[] QD;
	}
	private:
	schar *y;
	Cache *cache;
	double *QD;
	};

	class ONE_CLASS_Q: public Kernel
	{
	public:
	ONE_CLASS_Q(const PREFIX(problem)& prob, const svm_parameter& param, BlasFunctions *blas_functions)
	:Kernel(prob.l, prob.x, param, blas_functions)
	{
	cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
	QD = new double[prob.l];
	for(int i=0;i<prob.l;i++)
	QD[i] = (this->*kernel_function)(i,i);
	}

	Qfloat *get_Q(int i, int len) const
	{
	Qfloat *data;
	int start, j;
	if((start = cache->get_data(i,&data,len)) < len)
	{
	for(j=start;j<len;j++)
	data[j] = (Qfloat)(this->*kernel_function)(i,j);
	}
	return data;
	}

	double *get_QD() const
	{
	return QD;
	}

	void swap_index(int i, int j) const
	{
	cache->swap_index(i,j);
	Kernel::swap_index(i,j);
	swap(QD[i],QD[j]);
	}

	~ONE_CLASS_Q()
	{
	delete cache;
	delete[] QD;
	}
	private:
	Cache *cache;
	double *QD;
	};

	class SVR_Q: public Kernel
	{
	public:
	SVR_Q(const PREFIX(problem)& prob, const svm_parameter& param, BlasFunctions *blas_functions)
	:Kernel(prob.l, prob.x, param, blas_functions)
	{
	l = prob.l;
	cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
	QD = new double[2*l];
	sign = new schar[2*l];
	index = new int[2*l];
	for(int k=0;k<l;k++)
	{
	sign[k] = 1;
	sign[k+l] = -1;
	index[k] = k;
	index[k+l] = k;
	QD[k] = (this->*kernel_function)(k,k);
	QD[k+l] = QD[k];
	}
	buffer[0] = new Qfloat[2*l];
	buffer[1] = new Qfloat[2*l];
	next_buffer = 0;
	}

	void swap_index(int i, int j) const
	{
	swap(sign[i],sign[j]);
	swap(index[i],index[j]);
	swap(QD[i],QD[j]);
	}

	Qfloat *get_Q(int i, int len) const
	{
	Qfloat *data;
	int j, real_i = index[i];
	if(cache->get_data(real_i,&data,l) < l)
	{
	for(j=0;j<l;j++)
	data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
	}

	// reorder and copy
	Qfloat *buf = buffer[next_buffer];
	next_buffer = 1 - next_buffer;
	schar si = sign[i];
	for(j=0;j<len;j++)
	buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
	return buf;
	}

	double *get_QD() const
	{
	return QD;
	}

	~SVR_Q()
	{
	delete cache;
	delete[] sign;
	delete[] index;
	delete[] buffer[0];
	delete[] buffer[1];
	delete[] QD;
	}
	private:
	int l;
	Cache *cache;
	schar *sign;
	int *index;
	mutable int next_buffer;
	Qfloat *buffer[2];
	double *QD;
	};

	//
	// construct and solve various formulations
	//
	static void solve_c_svc(
	const PREFIX(problem) prob, const svm_parameter param,
	double alpha, Solver::SolutionInfo si, double Cp, double Cn, BlasFunctions *blas_functions)
	{
	int l = prob->l;
	double *minus_ones = new double[l];
	schar *y = new schar[l];
	double *C = new double[l];

	int i;

	for(i=0;i<l;i++)
	{
	alpha[i] = 0;
	minus_ones[i] = -1;
	if(prob->y[i] > 0)
	{
	y[i] = +1;
	C[i] = prob->W[i]*Cp;
	}
	else
	{
	y[i] = -1;
	C[i] = prob->W[i]*Cn;
	}
	}

	Solver s;
	s.Solve(l, SVC_Q(prob,param,y, blas_functions), minus_ones, y,
	alpha, C, param->eps, si, param->shrinking,
	param->max_iter);

	/*
	double sum_alpha=0;
	for(i=0;i<l;i++)
	sum_alpha += alpha[i];

	if (Cp==Cn)
	info("nu = %f\n", sum_alpha/(Cp*prob->l));
	*/

	for(i=0;i<l;i++)
	alpha[i] *= y[i];

	delete[] C;
	delete[] minus_ones;
	delete[] y;
	}

	static void solve_nu_svc(
	const PREFIX(problem) prob, const svm_parameter param,
	double alpha, Solver::SolutionInfo si, BlasFunctions *blas_functions)
	{
	int i;
	int l = prob->l;
	double nu = param->nu;

	schar *y = new schar[l];
	double *C = new double[l];

	for(i=0;i<l;i++)
	{
	if(prob->y[i]>0)
	y[i] = +1;
	else
	y[i] = -1;

	C[i] = prob->W[i];
	}

	double nu_l = 0;
	for(i=0;i<l;i++) nu_l += nu*C[i];
	double sum_pos = nu_l/2;
	double sum_neg = nu_l/2;

	for(i=0;i<l;i++)
	if(y[i] == +1)
	{
	alpha[i] = min(C[i],sum_pos);
	sum_pos -= alpha[i];
	}
	else
	{
	alpha[i] = min(C[i],sum_neg);
	sum_neg -= alpha[i];
	}

	double *zeros = new double[l];

	for(i=0;i<l;i++)
	zeros[i] = 0;

	Solver_NU s;
	s.Solve(l, SVC_Q(prob,param,y,blas_functions), zeros, y,
	alpha, C, param->eps, si, param->shrinking, param->max_iter);
	double r = si->r;

	info("C = %f\n",1/r);

	for(i=0;i<l;i++)
	{
	alpha[i] *= y[i]/r;
	si->upper_bound[i] /= r;
	}

	si->rho /= r;
	si->obj /= (r*r);

	delete[] C;
	delete[] y;
	delete[] zeros;
	}

	static void solve_one_class(
	const PREFIX(problem) prob, const svm_parameter param,
	double alpha, Solver::SolutionInfo si, BlasFunctions *blas_functions)
	{
	int l = prob->l;
	double *zeros = new double[l];
	schar *ones = new schar[l];
	double *C = new double[l];
	int i;

	double nu_l = 0;

	for(i=0;i<l;i++)
	{
	C[i] = prob->W[i];
	nu_l += C[i] * param->nu;
	}

	i = 0;
	while(nu_l > 0)
	{
	alpha[i] = min(C[i],nu_l);
	nu_l -= alpha[i];
	++i;
	}
	for(;i<l;i++)
	alpha[i] = 0;

	for(i=0;i<l;i++)
	{
	zeros[i] = 0;
	ones[i] = 1;
	}

	Solver s;
	s.Solve(l, ONE_CLASS_Q(prob,param,blas_functions), zeros, ones,
	alpha, C, param->eps, si, param->shrinking, param->max_iter);

	delete[] C;
	delete[] zeros;
	delete[] ones;
	}

	static void solve_epsilon_svr(
	const PREFIX(problem) prob, const svm_parameter param,
	double alpha, Solver::SolutionInfo si, BlasFunctions *blas_functions)
	{
	int l = prob->l;
	double alpha2 = new double[2l];
	double linear_term = new double[2l];
	schar y = new schar[2l];
	double C = new double[2l];
	int i;

	for(i=0;i<l;i++)
	{
	alpha2[i] = 0;
	linear_term[i] = param->p - prob->y[i];
	y[i] = 1;
	C[i] = prob->W[i]*param->C;

	alpha2[i+l] = 0;
	linear_term[i+l] = param->p + prob->y[i];
	y[i+l] = -1;
	C[i+l] = prob->W[i]*param->C;
	}

	Solver s;
	s.Solve(2l, SVR_Q(prob,*param,blas_functions), linear_term, y,
	alpha2, C, param->eps, si, param->shrinking, param->max_iter);

	double sum_alpha = 0;
	for(i=0;i<l;i++)
	{
	alpha[i] = alpha2[i] - alpha2[i+l];
	sum_alpha += fabs(alpha[i]);
	}


	delete[] alpha2;
	delete[] linear_term;
	delete[] C;
	delete[] y;
	}

	static void solve_nu_svr(
	const PREFIX(problem) prob, const svm_parameter param,
	double alpha, Solver::SolutionInfo si, BlasFunctions *blas_functions)
	{
	int l = prob->l;
	double C = new double[2l];
	double alpha2 = new double[2l];
	double linear_term = new double[2l];
	schar y = new schar[2l];
	int i;

	double sum = 0;
	for(i=0;i<l;i++)
	{
	C[i] = C[i+l] = prob->W[i]*param->C;
	sum += C[i] * param->nu;
	}
	sum /= 2;

	for(i=0;i<l;i++)
	{
	alpha2[i] = alpha2[i+l] = min(sum,C[i]);
	sum -= alpha2[i];

	linear_term[i] = - prob->y[i];
	y[i] = 1;

	linear_term[i+l] = prob->y[i];
	y[i+l] = -1;
	}

	Solver_NU s;
	s.Solve(2l, SVR_Q(prob,*param,blas_functions), linear_term, y,
	alpha2, C, param->eps, si, param->shrinking, param->max_iter);

	info("epsilon = %f\n",-si->r);

	for(i=0;i<l;i++)
	alpha[i] = alpha2[i] - alpha2[i+l];

	delete[] alpha2;
	delete[] linear_term;
	delete[] C;
	delete[] y;
	}

	//
	// decision_function
	//
	struct decision_function
	{
	double *alpha;
	double rho;
	int n_iter;
	};

	static decision_function svm_train_one(
	const PREFIX(problem) prob, const svm_parameter param,
	double Cp, double Cn, int status, BlasFunctions blas_functions)
	{
	double *alpha = Malloc(double,prob->l);
	Solver::SolutionInfo si;
	switch(param->svm_type)
	{
	case C_SVC:
	si.upper_bound = Malloc(double,prob->l);
	solve_c_svc(prob,param,alpha,&si,Cp,Cn,blas_functions);
	break;
	case NU_SVC:
	si.upper_bound = Malloc(double,prob->l);
	solve_nu_svc(prob,param,alpha,&si,blas_functions);
	break;
	case ONE_CLASS:
	si.upper_bound = Malloc(double,prob->l);
	solve_one_class(prob,param,alpha,&si,blas_functions);
	break;
	case EPSILON_SVR:
	si.upper_bound = Malloc(double,2*prob->l);
	solve_epsilon_svr(prob,param,alpha,&si,blas_functions);
	break;
	case NU_SVR:
	si.upper_bound = Malloc(double,2*prob->l);
	solve_nu_svr(prob,param,alpha,&si,blas_functions);
	break;
	}

	*status \|= si.solve_timed_out;

	info("obj = %f, rho = %f\n",si.obj,si.rho);

	// output SVs

	int nSV = 0;
	int nBSV = 0;
	for(int i=0;i<prob->l;i++)
	{
	if(fabs(alpha[i]) > 0)
	{
	++nSV;
	if(prob->y[i] > 0)
	{
	if(fabs(alpha[i]) >= si.upper_bound[i])
	++nBSV;
	}
	else
	{
	if(fabs(alpha[i]) >= si.upper_bound[i])
	++nBSV;
	}
	}
	}

	free(si.upper_bound);

	info("nSV = %d, nBSV = %d\n",nSV,nBSV);

	decision_function f;
	f.alpha = alpha;
	f.rho = si.rho;
	f.n_iter = si.n_iter;
	return f;
	}

	// Platt's binary SVM Probabilistic Output: an improvement from Lin et al.
	static void sigmoid_train(
	int l, const double dec_values, const double labels,
	double& A, double& B)
	{
	double prior1=0, prior0 = 0;
	int i;

	for (i=0;i<l;i++)
	if (labels[i] > 0) prior1+=1;
	else prior0+=1;

	int max_iter=100; // Maximal number of iterations
	double min_step=1e-10; // Minimal step taken in line search
	double sigma=1e-12; // For numerically strict PD of Hessian
	double eps=1e-5;
	double hiTarget=(prior1+1.0)/(prior1+2.0);
	double loTarget=1/(prior0+2.0);
	double *t=Malloc(double,l);
	double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
	double newA,newB,newf,d1,d2;
	int iter;

	// Initial Point and Initial Fun Value
	A=0.0; B=log((prior0+1.0)/(prior1+1.0));
	double fval = 0.0;

	for (i=0;i<l;i++)
	{
	if (labels[i]>0) t[i]=hiTarget;
	else t[i]=loTarget;
	fApB = dec_values[i]*A+B;
	if (fApB>=0)
	fval += t[i]*fApB + log(1+exp(-fApB));
	else
	fval += (t[i] - 1)*fApB +log(1+exp(fApB));
	}
	for (iter=0;iter<max_iter;iter++)
	{
	// Update Gradient and Hessian (use H' = H + sigma I)
	h11=sigma; // numerically ensures strict PD
	h22=sigma;
	h21=0.0;g1=0.0;g2=0.0;
	for (i=0;i<l;i++)
	{
	fApB = dec_values[i]*A+B;
	if (fApB >= 0)
	{
	p=exp(-fApB)/(1.0+exp(-fApB));
	q=1.0/(1.0+exp(-fApB));
	}
	else
	{
	p=1.0/(1.0+exp(fApB));
	q=exp(fApB)/(1.0+exp(fApB));
	}
	d2=p*q;
	h11+=dec_values[i]dec_values[i]d2;
	h22+=d2;
	h21+=dec_values[i]*d2;
	d1=t[i]-p;
	g1+=dec_values[i]*d1;
	g2+=d1;
	}

	// Stopping Criteria
	if (fabs(g1)<eps && fabs(g2)<eps)
	break;

	// Finding Newton direction: -inv(H') * g
	det=h11h22-h21h21;
	dA=-(h22g1 - h21 g2) / det;
	dB=-(-h21g1+ h11 g2) / det;
	gd=g1dA+g2dB;


	stepsize = 1; // Line Search
	while (stepsize >= min_step)
	{
	newA = A + stepsize * dA;
	newB = B + stepsize * dB;

	// New function value
	newf = 0.0;
	for (i=0;i<l;i++)
	{
	fApB = dec_values[i]*newA+newB;
	if (fApB >= 0)
	newf += t[i]*fApB + log(1+exp(-fApB));
	else
	newf += (t[i] - 1)*fApB +log(1+exp(fApB));
	}
	// Check sufficient decrease
	if (newf<fval+0.0001stepsizegd)
	{
	A=newA;B=newB;fval=newf;
	break;
	}
	else
	stepsize = stepsize / 2.0;
	}

	if (stepsize < min_step)
	{
	info("Line search fails in two-class probability estimates\n");
	break;
	}
	}

	if (iter>=max_iter)
	info("Reaching maximal iterations in two-class probability estimates\n");
	free(t);
	}

	static double sigmoid_predict(double decision_value, double A, double B)
	{
	double fApB = decision_value*A+B;
	// 1-p used later; avoid catastrophic cancellation
	if (fApB >= 0)
	return exp(-fApB)/(1.0+exp(-fApB));
	else
	return 1.0/(1+exp(fApB)) ;
	}

	// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
	static void multiclass_probability(int k, double *r, double p)
	{
	int t,j;
	int iter = 0, max_iter=max(100,k);
	double *Q=Malloc(double ,k);
	double *Qp=Malloc(double,k);
	double pQp, eps=0.005/k;

	for (t=0;t<k;t++)
	{
	p[t]=1.0/k; // Valid if k = 1
	Q[t]=Malloc(double,k);
	Q[t][t]=0;
	for (j=0;j<t;j++)
	{
	Q[t][t]+=r[j][t]*r[j][t];
	Q[t][j]=Q[j][t];
	}
	for (j=t+1;j<k;j++)
	{
	Q[t][t]+=r[j][t]*r[j][t];
	Q[t][j]=-r[j][t]*r[t][j];
	}
	}
	for (iter=0;iter<max_iter;iter++)
	{
	// stopping condition, recalculate QP,pQP for numerical accuracy
	pQp=0;
	for (t=0;t<k;t++)
	{
	Qp[t]=0;
	for (j=0;j<k;j++)
	Qp[t]+=Q[t][j]*p[j];
	pQp+=p[t]*Qp[t];
	}
	double max_error=0;
	for (t=0;t<k;t++)
	{
	double error=fabs(Qp[t]-pQp);
	if (error>max_error)
	max_error=error;
	}
	if (max_error<eps) break;

	for (t=0;t<k;t++)
	{
	double diff=(-Qp[t]+pQp)/Q[t][t];
	p[t]+=diff;
	pQp=(pQp+diff(diffQ[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
	for (j=0;j<k;j++)
	{
	Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
	p[j]/=(1+diff);
	}
	}
	}
	if (iter>=max_iter)
	info("Exceeds max_iter in multiclass_prob\n");
	for(t=0;t<k;t++) free(Q[t]);
	free(Q);
	free(Qp);
	}

	// Cross-validation decision values for probability estimates
	static void svm_binary_svc_probability(
	const PREFIX(problem) prob, const svm_parameter param,
	double Cp, double Cn, double& probA, double& probB, int * status, BlasFunctions *blas_functions)
	{
	int i;
	int nr_fold = 5;
	int *perm = Malloc(int,prob->l);
	double *dec_values = Malloc(double,prob->l);

	// random shuffle
	for(i=0;i<prob->l;i++) perm[i]=i;
	for(i=0;i<prob->l;i++)
	{
	int j = i+bounded_rand_int(prob->l-i);
	swap(perm[i],perm[j]);
	}
	for(i=0;i<nr_fold;i++)
	{
	int begin = i*prob->l/nr_fold;
	int end = (i+1)*prob->l/nr_fold;
	int j,k;
	struct PREFIX(problem) subprob;

	subprob.l = prob->l-(end-begin);
	#ifdef _DENSE_REP
	subprob.x = Malloc(struct PREFIX(node),subprob.l);
	#else
	subprob.x = Malloc(struct PREFIX(node)*,subprob.l);
	#endif
	subprob.y = Malloc(double,subprob.l);
	subprob.W = Malloc(double,subprob.l);

	k=0;
	for(j=0;j<begin;j++)
	{
	subprob.x[k] = prob->x[perm[j]];
	subprob.y[k] = prob->y[perm[j]];
	subprob.W[k] = prob->W[perm[j]];
	++k;
	}
	for(j=end;j<prob->l;j++)
	{
	subprob.x[k] = prob->x[perm[j]];
	subprob.y[k] = prob->y[perm[j]];
	subprob.W[k] = prob->W[perm[j]];
	++k;
	}
	int p_count=0,n_count=0;
	for(j=0;j<k;j++)
	if(subprob.y[j]>0)
	p_count++;
	else
	n_count++;

	if(p_count==0 && n_count==0)
	for(j=begin;j<end;j++)
	dec_values[perm[j]] = 0;
	else if(p_count > 0 && n_count == 0)
	for(j=begin;j<end;j++)
	dec_values[perm[j]] = 1;
	else if(p_count == 0 && n_count > 0)
	for(j=begin;j<end;j++)
	dec_values[perm[j]] = -1;
	else
	{
	svm_parameter subparam = *param;
	subparam.probability=0;
	subparam.C=1.0;
	subparam.nr_weight=2;
	subparam.weight_label = Malloc(int,2);
	subparam.weight = Malloc(double,2);
	subparam.weight_label[0]=+1;
	subparam.weight_label[1]=-1;
	subparam.weight[0]=Cp;
	subparam.weight[1]=Cn;
	struct PREFIX(model) *submodel = PREFIX(train)(&subprob,&subparam, status, blas_functions);
	for(j=begin;j<end;j++)
	{
	#ifdef _DENSE_REP
	PREFIX(predict_values)(submodel,(prob->x+perm[j]),&(dec_values[perm[j]]), blas_functions);
	#else
	PREFIX(predict_values)(submodel,prob->x[perm[j]],&(dec_values[perm[j]]), blas_functions);
	#endif
	// ensure +1 -1 order; reason not using CV subroutine
	dec_values[perm[j]] *= submodel->label[0];
	}
	PREFIX(free_and_destroy_model)(&submodel);
	PREFIX(destroy_param)(&subparam);
	}
	free(subprob.x);
	free(subprob.y);
	free(subprob.W);
	}
	sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
	free(dec_values);
	free(perm);
	}

	// Return parameter of a Laplace distribution
	static double svm_svr_probability(
	const PREFIX(problem) prob, const svm_parameter param, BlasFunctions *blas_functions)
	{
	int i;
	int nr_fold = 5;
	double *ymv = Malloc(double,prob->l);
	double mae = 0;

	svm_parameter newparam = *param;
	newparam.probability = 0;
	newparam.random_seed = -1; // This is called from train, which already sets
	// the seed.
	PREFIX(cross_validation)(prob,&newparam,nr_fold,ymv, blas_functions);
	for(i=0;i<prob->l;i++)
	{
	ymv[i]=prob->y[i]-ymv[i];
	mae += fabs(ymv[i]);
	}
	mae /= prob->l;
	double std=sqrt(2maemae);
	int count=0;
	mae=0;
	for(i=0;i<prob->l;i++)
	if (fabs(ymv[i]) > 5*std)
	count=count+1;
	else
	mae+=fabs(ymv[i]);
	mae /= (prob->l-count);
	info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-\|z\|/sigma)/(2sigma),sigma= %g\n",mae);
	free(ymv);
	return mae;
	}



	// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
	// perm, length l, must be allocated before calling this subroutine
	static void svm_group_classes(const PREFIX(problem) prob, int nr_class_ret, int label_ret, int start_ret, int *count_ret, int perm)
	{
	int l = prob->l;
	int max_nr_class = 16;
	int nr_class = 0;
	int *label = Malloc(int,max_nr_class);
	int *count = Malloc(int,max_nr_class);
	int *data_label = Malloc(int,l);
	int i, j, this_label, this_count;

	for(i=0;i<l;i++)
	{
	this_label = (int)prob->y[i];
	for(j=0;j<nr_class;j++)
	{
	if(this_label == label[j])
	{
	++count[j];
	break;
	}
	}
	if(j == nr_class)
	{
	if(nr_class == max_nr_class)
	{
	max_nr_class *= 2;
	label = (int )realloc(label,max_nr_classsizeof(int));
	count = (int )realloc(count,max_nr_classsizeof(int));
	}
	label[nr_class] = this_label;
	count[nr_class] = 1;
	++nr_class;
	}
	}

	/*
	* Sort labels by straight insertion and apply the same
	* transformation to array count.
	*/
	for(j=1; j<nr_class; j++)
	{
	i = j-1;
	this_label = label[j];
	this_count = count[j];
	while(i>=0 && label[i] > this_label)
	{
	label[i+1] = label[i];
	count[i+1] = count[i];
	i--;
	}
	label[i+1] = this_label;
	count[i+1] = this_count;
	}

	for (i=0; i<l; i++)
	{
	j = 0;
	this_label = (int)prob->y[i];
	while(this_label != label[j]){
	j ++;
	}
	data_label[i] = j;
	}

	int *start = Malloc(int,nr_class);
	start[0] = 0;
	for(i=1;i<nr_class;i++)
	start[i] = start[i-1]+count[i-1];
	for(i=0;i<l;i++)
	{
	perm[start[data_label[i]]] = i;
	++start[data_label[i]];
	}

	start[0] = 0;
	for(i=1;i<nr_class;i++)
	start[i] = start[i-1]+count[i-1];

	*nr_class_ret = nr_class;
	*label_ret = label;
	*start_ret = start;
	*count_ret = count;
	free(data_label);
	}

	} /* end namespace */

	// Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
	//
	static void remove_zero_weight(PREFIX(problem) newprob, const PREFIX(problem) prob)
	{
	int i;
	int l = 0;
	for(i=0;i<prob->l;i++)
	if(prob->W[i] > 0) l++;
	newprob = prob;
	newprob->l = l;
	#ifdef _DENSE_REP
	newprob->x = Malloc(PREFIX(node),l);
	#else
	newprob->x = Malloc(PREFIX(node) *,l);
	#endif
	newprob->y = Malloc(double,l);
	newprob->W = Malloc(double,l);

	int j = 0;
	for(i=0;i<prob->l;i++)
	if(prob->W[i] > 0)
	{
	newprob->x[j] = prob->x[i];
	newprob->y[j] = prob->y[i];
	newprob->W[j] = prob->W[i];
	j++;
	}
	}

	//
	// Interface functions
	//
	PREFIX(model) PREFIX(train)(const PREFIX(problem) prob, const svm_parameter *param,
	int status, BlasFunctions blas_functions)
	{
	PREFIX(problem) newprob;
	remove_zero_weight(&newprob, prob);
	prob = &newprob;

	PREFIX(model) *model = Malloc(PREFIX(model),1);
	model->param = *param;
	model->free_sv = 0; // XXX

	if(param->random_seed >= 0)
	{
	set_seed(param->random_seed);
	}

	if(param->svm_type == ONE_CLASS \|\|
	param->svm_type == EPSILON_SVR \|\|
	param->svm_type == NU_SVR)
	{
	// regression or one-class-svm
	model->nr_class = 2;
	model->label = NULL;
	model->nSV = NULL;
	model->probA = NULL; model->probB = NULL;
	model->sv_coef = Malloc(double *,1);

	if(param->probability &&
	(param->svm_type == EPSILON_SVR \|\|
	param->svm_type == NU_SVR))
	{
	model->probA = Malloc(double,1);
	model->probA[0] = NAMESPACE::svm_svr_probability(prob,param,blas_functions);
	}

	NAMESPACE::decision_function f = NAMESPACE::svm_train_one(prob,param,0,0, status,blas_functions);
	model->rho = Malloc(double,1);
	model->rho[0] = f.rho;
	model->n_iter = Malloc(int,1);
	model->n_iter[0] = f.n_iter;

	int nSV = 0;
	int i;
	for(i=0;i<prob->l;i++)
	if(fabs(f.alpha[i]) > 0) ++nSV;
	model->l = nSV;
	#ifdef _DENSE_REP
	model->SV = Malloc(PREFIX(node),nSV);
	#else
	model->SV = Malloc(PREFIX(node) *,nSV);
	#endif
	model->sv_ind = Malloc(int, nSV);
	model->sv_coef[0] = Malloc(double, nSV);
	int j = 0;
	for(i=0;i<prob->l;i++)
	if(fabs(f.alpha[i]) > 0)
	{
	model->SV[j] = prob->x[i];
	model->sv_ind[j] = i;
	model->sv_coef[0][j] = f.alpha[i];
	++j;
	}

	free(f.alpha);
	}
	else
	{
	// classification
	int l = prob->l;
	int nr_class;
	int *label = NULL;
	int *start = NULL;
	int *count = NULL;
	int *perm = Malloc(int,l);

	// group training data of the same class
	NAMESPACE::svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
	#ifdef _DENSE_REP
	PREFIX(node) *x = Malloc(PREFIX(node),l);
	#else
	PREFIX(node) *x = Malloc(PREFIX(node) ,l);
	#endif
	double *W = Malloc(double, l);

	int i;
	for(i=0;i<l;i++)
	{
	x[i] = prob->x[perm[i]];
	W[i] = prob->W[perm[i]];
	}

	// calculate weighted C

	double *weighted_C = Malloc(double, nr_class);
	for(i=0;i<nr_class;i++)
	weighted_C[i] = param->C;
	for(i=0;i<param->nr_weight;i++)
	{
	int j;
	for(j=0;j<nr_class;j++)
	if(param->weight_label[i] == label[j])
	break;
	if(j == nr_class)
	fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
	else
	weighted_C[j] *= param->weight[i];
	}

	// train k*(k-1)/2 models

	bool *nonzero = Malloc(bool,l);
	for(i=0;i<l;i++)
	nonzero[i] = false;
	NAMESPACE::decision_function f = Malloc(NAMESPACE::decision_function,nr_class(nr_class-1)/2);

	double probA=NULL,probB=NULL;
	if (param->probability)
	{
	probA=Malloc(double,nr_class*(nr_class-1)/2);
	probB=Malloc(double,nr_class*(nr_class-1)/2);
	}

	int p = 0;
	for(i=0;i<nr_class;i++)
	for(int j=i+1;j<nr_class;j++)
	{
	PREFIX(problem) sub_prob;
	int si = start[i], sj = start[j];
	int ci = count[i], cj = count[j];
	sub_prob.l = ci+cj;
	#ifdef _DENSE_REP
	sub_prob.x = Malloc(PREFIX(node),sub_prob.l);
	#else
	sub_prob.x = Malloc(PREFIX(node) *,sub_prob.l);
	#endif
	sub_prob.W = Malloc(double,sub_prob.l);
	sub_prob.y = Malloc(double,sub_prob.l);
	int k;
	for(k=0;k<ci;k++)
	{
	sub_prob.x[k] = x[si+k];
	sub_prob.y[k] = +1;
	sub_prob.W[k] = W[si+k];
	}
	for(k=0;k<cj;k++)
	{
	sub_prob.x[ci+k] = x[sj+k];
	sub_prob.y[ci+k] = -1;
	sub_prob.W[ci+k] = W[sj+k];
	}

	if(param->probability)
	NAMESPACE::svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p], status, blas_functions);

	f[p] = NAMESPACE::svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j], status, blas_functions);
	for(k=0;k<ci;k++)
	if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
	nonzero[si+k] = true;
	for(k=0;k<cj;k++)
	if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
	nonzero[sj+k] = true;
	free(sub_prob.x);
	free(sub_prob.y);
	free(sub_prob.W);
	++p;
	}

	// build output

	model->nr_class = nr_class;

	model->label = Malloc(int,nr_class);
	for(i=0;i<nr_class;i++)
	model->label[i] = label[i];

	model->rho = Malloc(double,nr_class*(nr_class-1)/2);
	model->n_iter = Malloc(int,nr_class*(nr_class-1)/2);
	for(i=0;i<nr_class*(nr_class-1)/2;i++)
	{
	model->rho[i] = f[i].rho;
	model->n_iter[i] = f[i].n_iter;
	}

	if(param->probability)
	{
	model->probA = Malloc(double,nr_class*(nr_class-1)/2);
	model->probB = Malloc(double,nr_class*(nr_class-1)/2);
	for(i=0;i<nr_class*(nr_class-1)/2;i++)
	{
	model->probA[i] = probA[i];
	model->probB[i] = probB[i];
	}
	}
	else
	{
	model->probA=NULL;
	model->probB=NULL;
	}

	int total_sv = 0;
	int *nz_count = Malloc(int,nr_class);
	model->nSV = Malloc(int,nr_class);
	for(i=0;i<nr_class;i++)
	{
	int nSV = 0;
	for(int j=0;j<count[i];j++)
	if(nonzero[start[i]+j])
	{
	++nSV;
	++total_sv;
	}
	model->nSV[i] = nSV;
	nz_count[i] = nSV;
	}

	info("Total nSV = %d\n",total_sv);

	model->l = total_sv;
	model->sv_ind = Malloc(int, total_sv);
	#ifdef _DENSE_REP
	model->SV = Malloc(PREFIX(node),total_sv);
	#else
	model->SV = Malloc(PREFIX(node) *,total_sv);
	#endif
	p = 0;
	for(i=0;i<l;i++) {
	if(nonzero[i]) {
	model->SV[p] = x[i];
	model->sv_ind[p] = perm[i];
	++p;
	}
	}

	int *nz_start = Malloc(int,nr_class);
	nz_start[0] = 0;
	for(i=1;i<nr_class;i++)
	nz_start[i] = nz_start[i-1]+nz_count[i-1];

	model->sv_coef = Malloc(double *,nr_class-1);
	for(i=0;i<nr_class-1;i++)
	model->sv_coef[i] = Malloc(double,total_sv);

	p = 0;
	for(i=0;i<nr_class;i++)
	for(int j=i+1;j<nr_class;j++)
	{
	// classifier (i,j): coefficients with
	// i are in sv_coef[j-1][nz_start[i]...],
	// j are in sv_coef[i][nz_start[j]...]

	int si = start[i];
	int sj = start[j];
	int ci = count[i];
	int cj = count[j];

	int q = nz_start[i];
	int k;
	for(k=0;k<ci;k++)
	if(nonzero[si+k])
	model->sv_coef[j-1][q++] = f[p].alpha[k];
	q = nz_start[j];
	for(k=0;k<cj;k++)
	if(nonzero[sj+k])
	model->sv_coef[i][q++] = f[p].alpha[ci+k];
	++p;
	}

	free(label);
	free(probA);
	free(probB);
	free(count);
	free(perm);
	free(start);
	free(W);
	free(x);
	free(weighted_C);
	free(nonzero);
	for(i=0;i<nr_class*(nr_class-1)/2;i++)
	free(f[i].alpha);
	free(f);
	free(nz_count);
	free(nz_start);
	}
	free(newprob.x);
	free(newprob.y);
	free(newprob.W);
	return model;
	}

	// Stratified cross validation
	void PREFIX(cross_validation)(const PREFIX(problem) prob, const svm_parameter param, int nr_fold, double target, BlasFunctions blas_functions)
	{
	int i;
	int *fold_start = Malloc(int,nr_fold+1);
	int l = prob->l;
	int *perm = Malloc(int,l);
	int nr_class;
	if(param->random_seed >= 0)
	{
	set_seed(param->random_seed);
	}

	// stratified cv may not give leave-one-out rate
	// Each class to l folds -> some folds may have zero elements
	if((param->svm_type == C_SVC \|\|
	param->svm_type == NU_SVC) && nr_fold < l)
	{
	int *start = NULL;
	int *label = NULL;
	int *count = NULL;
	NAMESPACE::svm_group_classes(prob,&nr_class,&label,&start,&count,perm);

	// random shuffle and then data grouped by fold using the array perm
	int *fold_count = Malloc(int,nr_fold);
	int c;
	int *index = Malloc(int,l);
	for(i=0;i<l;i++)
	index[i]=perm[i];
	for (c=0; c<nr_class; c++)
	for(i=0;i<count[c];i++)
	{
	int j = i+bounded_rand_int(count[c]-i);
	swap(index[start[c]+j],index[start[c]+i]);
	}
	for(i=0;i<nr_fold;i++)
	{
	fold_count[i] = 0;
	for (c=0; c<nr_class;c++)
	fold_count[i]+=(i+1)count[c]/nr_fold-icount[c]/nr_fold;
	}
	fold_start[0]=0;
	for (i=1;i<=nr_fold;i++)
	fold_start[i] = fold_start[i-1]+fold_count[i-1];
	for (c=0; c<nr_class;c++)
	for(i=0;i<nr_fold;i++)
	{
	int begin = start[c]+i*count[c]/nr_fold;
	int end = start[c]+(i+1)*count[c]/nr_fold;
	for(int j=begin;j<end;j++)
	{
	perm[fold_start[i]] = index[j];
	fold_start[i]++;
	}
	}
	fold_start[0]=0;
	for (i=1;i<=nr_fold;i++)
	fold_start[i] = fold_start[i-1]+fold_count[i-1];
	free(start);
	free(label);
	free(count);
	free(index);
	free(fold_count);
	}
	else
	{
	for(i=0;i<l;i++) perm[i]=i;
	for(i=0;i<l;i++)
	{
	int j = i+bounded_rand_int(l-i);
	swap(perm[i],perm[j]);
	}
	for(i=0;i<=nr_fold;i++)
	fold_start[i]=i*l/nr_fold;
	}

	for(i=0;i<nr_fold;i++)
	{
	int begin = fold_start[i];
	int end = fold_start[i+1];
	int j,k;
	struct PREFIX(problem) subprob;

	subprob.l = l-(end-begin);
	#ifdef _DENSE_REP
	subprob.x = Malloc(struct PREFIX(node),subprob.l);
	#else
	subprob.x = Malloc(struct PREFIX(node)*,subprob.l);
	#endif
	subprob.y = Malloc(double,subprob.l);
	subprob.W = Malloc(double,subprob.l);

	k=0;
	for(j=0;j<begin;j++)
	{
	subprob.x[k] = prob->x[perm[j]];
	subprob.y[k] = prob->y[perm[j]];
	subprob.W[k] = prob->W[perm[j]];
	++k;
	}
	for(j=end;j<l;j++)
	{
	subprob.x[k] = prob->x[perm[j]];
	subprob.y[k] = prob->y[perm[j]];
	subprob.W[k] = prob->W[perm[j]];
	++k;
	}
	int dummy_status = 0; // IGNORES TIMEOUT ERRORS
	struct PREFIX(model) *submodel = PREFIX(train)(&subprob,param, &dummy_status, blas_functions);
	if(param->probability &&
	(param->svm_type == C_SVC \|\| param->svm_type == NU_SVC))
	{
	double *prob_estimates=Malloc(double, PREFIX(get_nr_class)(submodel));
	for(j=begin;j<end;j++)
	#ifdef _DENSE_REP
	target[perm[j]] = PREFIX(predict_probability)(submodel,(prob->x + perm[j]),prob_estimates, blas_functions);
	#else
	target[perm[j]] = PREFIX(predict_probability)(submodel,prob->x[perm[j]],prob_estimates, blas_functions);
	#endif
	free(prob_estimates);
	}
	else
	for(j=begin;j<end;j++)
	#ifdef _DENSE_REP
	target[perm[j]] = PREFIX(predict)(submodel,prob->x+perm[j],blas_functions);
	#else
	target[perm[j]] = PREFIX(predict)(submodel,prob->x[perm[j]],blas_functions);
	#endif
	PREFIX(free_and_destroy_model)(&submodel);
	free(subprob.x);
	free(subprob.y);
	free(subprob.W);
	}
	free(fold_start);
	free(perm);
	}


	int PREFIX(get_svm_type)(const PREFIX(model) *model)
	{
	return model->param.svm_type;
	}

	int PREFIX(get_nr_class)(const PREFIX(model) *model)
	{
	return model->nr_class;
	}

	void PREFIX(get_labels)(const PREFIX(model) model, int label)
	{
	if (model->label != NULL)
	for(int i=0;i<model->nr_class;i++)
	label[i] = model->label[i];
	}

	double PREFIX(get_svr_probability)(const PREFIX(model) *model)
	{
	if ((model->param.svm_type == EPSILON_SVR \|\| model->param.svm_type == NU_SVR) &&
	model->probA!=NULL)
	return model->probA[0];
	else
	{
	fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
	return 0;
	}
	}

	double PREFIX(predict_values)(const PREFIX(model) model, const PREFIX(node) x, double* dec_values, BlasFunctions *blas_functions)
	{
	int i;
	if(model->param.svm_type == ONE_CLASS \|\|
	model->param.svm_type == EPSILON_SVR \|\|
	model->param.svm_type == NU_SVR)
	{
	double *sv_coef = model->sv_coef[0];
	double sum = 0;

	for(i=0;i<model->l;i++)
	#ifdef _DENSE_REP
	sum += sv_coef[i] * NAMESPACE::Kernel::k_function(x,model->SV+i,model->param,blas_functions);
	#else
	sum += sv_coef[i] * NAMESPACE::Kernel::k_function(x,model->SV[i],model->param,blas_functions);
	#endif
	sum -= model->rho[0];
	*dec_values = sum;

	if(model->param.svm_type == ONE_CLASS)
	return (sum>0)?1:-1;
	else
	return sum;
	}
	else
	{
	int nr_class = model->nr_class;
	int l = model->l;

	double *kvalue = Malloc(double,l);
	for(i=0;i<l;i++)
	#ifdef _DENSE_REP
	kvalue[i] = NAMESPACE::Kernel::k_function(x,model->SV+i,model->param,blas_functions);
	#else
	kvalue[i] = NAMESPACE::Kernel::k_function(x,model->SV[i],model->param,blas_functions);
	#endif

	int *start = Malloc(int,nr_class);
	start[0] = 0;
	for(i=1;i<nr_class;i++)
	start[i] = start[i-1]+model->nSV[i-1];

	int *vote = Malloc(int,nr_class);
	for(i=0;i<nr_class;i++)
	vote[i] = 0;

	int p=0;
	for(i=0;i<nr_class;i++)
	for(int j=i+1;j<nr_class;j++)
	{
	double sum = 0;
	int si = start[i];
	int sj = start[j];
	int ci = model->nSV[i];
	int cj = model->nSV[j];

	int k;
	double *coef1 = model->sv_coef[j-1];
	double *coef2 = model->sv_coef[i];
	for(k=0;k<ci;k++)
	sum += coef1[si+k] * kvalue[si+k];
	for(k=0;k<cj;k++)
	sum += coef2[sj+k] * kvalue[sj+k];
	sum -= model->rho[p];
	dec_values[p] = sum;

	if(dec_values[p] > 0)
	++vote[i];
	else
	++vote[j];
	p++;
	}

	int vote_max_idx = 0;
	for(i=1;i<nr_class;i++)
	if(vote[i] > vote[vote_max_idx])
	vote_max_idx = i;

	free(kvalue);
	free(start);
	free(vote);
	return model->label[vote_max_idx];
	}
	}

	double PREFIX(predict)(const PREFIX(model) model, const PREFIX(node) x, BlasFunctions *blas_functions)
	{
	int nr_class = model->nr_class;
	double *dec_values;
	if(model->param.svm_type == ONE_CLASS \|\|
	model->param.svm_type == EPSILON_SVR \|\|
	model->param.svm_type == NU_SVR)
	dec_values = Malloc(double, 1);
	else
	dec_values = Malloc(double, nr_class*(nr_class-1)/2);
	double pred_result = PREFIX(predict_values)(model, x, dec_values, blas_functions);
	free(dec_values);
	return pred_result;
	}

	double PREFIX(predict_probability)(
	const PREFIX(model) model, const PREFIX(node) x, double prob_estimates, BlasFunctions blas_functions)
	{
	if ((model->param.svm_type == C_SVC \|\| model->param.svm_type == NU_SVC) &&
	model->probA!=NULL && model->probB!=NULL)
	{
	int i;
	int nr_class = model->nr_class;
	double dec_values = Malloc(double, nr_class(nr_class-1)/2);
	PREFIX(predict_values)(model, x, dec_values, blas_functions);

	double min_prob=1e-7;
	double *pairwise_prob=Malloc(double ,nr_class);
	for(i=0;i<nr_class;i++)
	pairwise_prob[i]=Malloc(double,nr_class);
	int k=0;
	for(i=0;i<nr_class;i++)
	for(int j=i+1;j<nr_class;j++)
	{
	pairwise_prob[i][j]=min(max(NAMESPACE::sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
	pairwise_prob[j][i]=1-pairwise_prob[i][j];
	k++;
	}
	NAMESPACE::multiclass_probability(nr_class,pairwise_prob,prob_estimates);

	int prob_max_idx = 0;
	for(i=1;i<nr_class;i++)
	if(prob_estimates[i] > prob_estimates[prob_max_idx])
	prob_max_idx = i;
	for(i=0;i<nr_class;i++)
	free(pairwise_prob[i]);
	free(dec_values);
	free(pairwise_prob);
	return model->label[prob_max_idx];
	}
	else
	return PREFIX(predict)(model, x, blas_functions);
	}


	void PREFIX(free_model_content)(PREFIX(model)* model_ptr)
	{
	if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
	#ifdef _DENSE_REP
	for (int i = 0; i < model_ptr->l; i++)
	free(model_ptr->SV[i].values);
	#else
	free((void *)(model_ptr->SV[0]));
	#endif

	if(model_ptr->sv_coef)
	{
	for(int i=0;i<model_ptr->nr_class-1;i++)
	free(model_ptr->sv_coef[i]);
	}

	free(model_ptr->SV);
	model_ptr->SV = NULL;

	free(model_ptr->sv_coef);
	model_ptr->sv_coef = NULL;

	free(model_ptr->sv_ind);
	model_ptr->sv_ind = NULL;

	free(model_ptr->rho);
	model_ptr->rho = NULL;

	free(model_ptr->label);
	model_ptr->label= NULL;

	free(model_ptr->probA);
	model_ptr->probA = NULL;

	free(model_ptr->probB);
	model_ptr->probB= NULL;

	free(model_ptr->nSV);
	model_ptr->nSV = NULL;

	free(model_ptr->n_iter);
	model_ptr->n_iter = NULL;
	}

	void PREFIX(free_and_destroy_model)(PREFIX(model)** model_ptr_ptr)
	{
	if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
	{
	PREFIX(free_model_content)(*model_ptr_ptr);
	free(*model_ptr_ptr);
	*model_ptr_ptr = NULL;
	}
	}

	void PREFIX(destroy_param)(svm_parameter* param)
	{
	free(param->weight_label);
	free(param->weight);
	}

	const char PREFIX(check_parameter)(const PREFIX(problem) prob, const svm_parameter *param)
	{
	// svm_type

	int svm_type = param->svm_type;
	if(svm_type != C_SVC &&
	svm_type != NU_SVC &&
	svm_type != ONE_CLASS &&
	svm_type != EPSILON_SVR &&
	svm_type != NU_SVR)
	return "unknown svm type";

	// kernel_type, degree

	int kernel_type = param->kernel_type;
	if(kernel_type != LINEAR &&
	kernel_type != POLY &&
	kernel_type != RBF &&
	kernel_type != SIGMOID &&
	kernel_type != PRECOMPUTED)
	return "unknown kernel type";

	if(param->gamma < 0)
	return "gamma < 0";

	if(param->degree < 0)
	return "degree of polynomial kernel < 0";

	// cache_size,eps,C,nu,p,shrinking

	if(param->cache_size <= 0)
	return "cache_size <= 0";

	if(param->eps <= 0)
	return "eps <= 0";

	if(svm_type == C_SVC \|\|
	svm_type == EPSILON_SVR \|\|
	svm_type == NU_SVR)
	if(param->C <= 0)
	return "C <= 0";

	if(svm_type == NU_SVC \|\|
	svm_type == ONE_CLASS \|\|
	svm_type == NU_SVR)
	if(param->nu <= 0 \|\| param->nu > 1)
	return "nu <= 0 or nu > 1";

	if(svm_type == EPSILON_SVR)
	if(param->p < 0)
	return "p < 0";

	if(param->shrinking != 0 &&
	param->shrinking != 1)
	return "shrinking != 0 and shrinking != 1";

	if(param->probability != 0 &&
	param->probability != 1)
	return "probability != 0 and probability != 1";

	if(param->probability == 1 &&
	svm_type == ONE_CLASS)
	return "one-class SVM probability output not supported yet";


	// check whether nu-svc is feasible

	if(svm_type == NU_SVC)
	{
	int l = prob->l;
	int max_nr_class = 16;
	int nr_class = 0;
	int *label = Malloc(int,max_nr_class);
	double *count = Malloc(double,max_nr_class);

	int i;
	for(i=0;i<l;i++)
	{
	int this_label = (int)prob->y[i];
	int j;
	for(j=0;j<nr_class;j++)
	if(this_label == label[j])
	{
	count[j] += prob->W[i];
	break;
	}
	if(j == nr_class)
	{
	if(nr_class == max_nr_class)
	{
	max_nr_class *= 2;
	label = (int )realloc(label,max_nr_classsizeof(int));
	count = (double )realloc(count,max_nr_classsizeof(double));

	}
	label[nr_class] = this_label;
	count[nr_class] = prob->W[i];
	++nr_class;
	}
	}

	for(i=0;i<nr_class;i++)
	{
	double n1 = count[i];
	for(int j=i+1;j<nr_class;j++)
	{
	double n2 = count[j];
	if(param->nu*(n1+n2)/2 > min(n1,n2))
	{
	free(label);
	free(count);
	return "specified nu is infeasible";
	}
	}
	}
	free(label);
	free(count);
	}

	if(svm_type == C_SVC \|\|
	svm_type == EPSILON_SVR \|\|
	svm_type == NU_SVR \|\|
	svm_type == ONE_CLASS)
	{
	PREFIX(problem) newprob;
	// filter samples with negative and null weights
	remove_zero_weight(&newprob, prob);

	// all samples were removed
	if(newprob.l == 0) {
	free(newprob.x);
	free(newprob.y);
	free(newprob.W);
	return "Invalid input - all samples have zero or negative weights.";
	}
	else if(prob->l != newprob.l &&
	svm_type == C_SVC)
	{
	bool only_one_label = true;
	int first_label = newprob.y[0];
	for(int i=1;i<newprob.l;i++)
	{
	if(newprob.y[i] != first_label)
	{
	only_one_label = false;
	break;
	}
	}
	if(only_one_label) {
	free(newprob.x);
	free(newprob.y);
	free(newprob.W);
	return "Invalid input - all samples with positive weights belong to the same class.";
	}
	}

	free(newprob.x);
	free(newprob.y);
	free(newprob.W);
	}
	return NULL;
	}

	void PREFIX(set_print_string_function)(void (print_func)(const char ))
	{
	if(print_func == NULL)
	svm_print_string = &print_string_stdout;
	else
	svm_print_string = print_func;
	}