PreMode / analysis /Hsu.et.al.git /plmc /src /inference.c

Upload folder using huggingface_hub

7718235 verified about 1 year ago

47.6 kB

	#include <stdlib.h>
	#include <ctype.h>
	#include <math.h>
	#include <stdio.h>
	#include <sys/time.h>
	#include <assert.h>
	#include <string.h>

	/* Optionally include OpenMP with the -fopenmp flag */
	#if defined(_OPENMP)
	#include <omp.h>
	#endif

	#include "include/lbfgs.h"
	#include "include/twister.h"

	#include "include/plm.h"
	#include "include/inference.h"

	/* Internal prototypes */
	numeric_t ElapsedTime(struct timeval *start);

	/* Numerical bounds for ZeroAPCPriors */
	#define LAMBDA_J_MIN 1E-2
	#define LAMBDA_J_MAX 1E4
	#define REGULARIZATION_GROUP_EPS 0.001

	/* Internal to InferPairModel:
	MAP estimation of parameters by L-BFGS */
	void EstimatePairModelMAP(numeric_t x, numeric_t lambdas, alignment_t *ali,
	options_t *options);
	/* Internal to EstimatePairModelMAP:
	Stochastic optimization with SGD */
	typedef numeric_t (gradfun_t) (void data, const numeric_t x, numeric_t g,
	const int n);
	void SGDOptimize(gradfun_t grad, void data, numeric_t x, const int n,
	const int maxIter, const numeric_t crit);
	numeric_t SGDWrapperPLM(void data, const numeric_t x, numeric_t *g, const int n);
	/* Internal to EstimatePairModelMAP:
	Objective functions for point parameter estimates (MAP) */
	static lbfgsfloatval_t PLMNegLogPosterior(void *instance,
	const lbfgsfloatval_t x, lbfgsfloatval_t g, const int n,
	const lbfgsfloatval_t step);
	static lbfgsfloatval_t PLMNegLogPosteriorGapReduce(void *instance,
	const lbfgsfloatval_t x, lbfgsfloatval_t g, const int n,
	const lbfgsfloatval_t step);
	static lbfgsfloatval_t PLMNegLogPosteriorBlock(void *instance,
	const lbfgsfloatval_t x, lbfgsfloatval_t g, const int n,
	const lbfgsfloatval_t step);
	static lbfgsfloatval_t PLMNegLogPosteriorDO(void *instance,
	const lbfgsfloatval_t x, lbfgsfloatval_t g, const int n,
	const lbfgsfloatval_t step);
	/* Internal to EstimatePairModelMAP: progress reporting */
	static int ReportProgresslBFGS(void instance, const lbfgsfloatval_t x,
	const lbfgsfloatval_t *g, const lbfgsfloatval_t fx,
	const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm,
	const lbfgsfloatval_t step, int n, int k, int ls);
	/* Internal to EstimatePairModelMAP: parameter processing */
	void PreCondition(const lbfgsfloatval_t x, lbfgsfloatval_t g,
	alignment_t ali, options_t options);
	lbfgsfloatval_t PostCondition(const lbfgsfloatval_t x, lbfgsfloatval_t g, lbfgsfloatval_t fx,
	alignment_t ali, options_t options);
	void ZeroAPCPriors(alignment_t ali, options_t options, numeric_t *lambdas,
	lbfgsfloatval_t *x);
	/* Internal to EstimatePairModelMAP: utility functions to L-BFGS */
	const char *LBFGSErrorString(int ret);

	numeric_t InferPairModel(alignment_t ali, options_t *options) {
	/* Estimate the parameters of a maximum entropy model for a
	multiple sequence alignment */

	/* Initialize the regularization parameters */
	numeric_t *lambdas =
	(numeric_t ) malloc((ali->nSites + ali->nSites (ali->nSites - 1) / 2)
	* sizeof(numeric_t));
	for (int i = 0; i < ali->nSites; i++) lambdaHi(i) = options->lambdaH;
	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	lambdaEij(i, j) = options->lambdaE;

	/* For gap-reduced problems, eliminate the gaps and reduce the alphabet */
	if (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE) {
	ali->nCodes = strlen(ali->alphabet) - 1;
	for (int i = 0; i < ali->nSites; i++)
	for (int s = 0; s < ali->nSeqs; s++)
	seq(s, i) -= 1;
	}

	/* Initialize parameters */
	ali->nParams = ali->nSites * ali->nCodes
	+ ali->nSites * (ali->nSites - 1) / 2 * ali->nCodes * ali->nCodes;
	numeric_t x = (numeric_t ) malloc(sizeof(numeric_t) * ali->nParams);
	if (x == NULL) {
	fprintf(stderr,
	"ERROR: Failed to allocate a memory block for variables.\n");
	exit(1);
	}
	for (int i = 0; i < ali->nParams; i++) x[i] = 0.0;

	/* Initialize site parameters with the ML estimates
	hi = log(fi) + C
	A single pseudocount is added for stability
	(Laplace's rule or Morcos et al. with lambda = nCodes) */
	if (options->zeroAPC != 1) {
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	xHi(i, ai) = log(fi(i, ai) * ali->nEff + 1.0);
	/* Zero-sum gauge */
	for (int i = 0; i < ali->nSites; i++) {
	numeric_t hSum = 0.0;
	for (int ai = 0; ai < ali->nCodes; ai++) hSum += xHi(i, ai);
	numeric_t hShift = hSum / (numeric_t) ali->nCodes;
	for (int ai = 0; ai < ali->nCodes; ai++)
	xHi(i, ai) -= hShift;
	}
	}

	switch(options->estimator) {
	/* Point estimates */
	case INFER_MAP:
	/* Maximum a posteriori estimates of model parameters */
	EstimatePairModelMAP(x, lambdas, ali, options);
	break;
	/* For: future alternative estimators */
	default:
	/* Maximum a posteriori estimates of model parameters */
	EstimatePairModelMAP(x, lambdas, ali, options);
	}

	/* Restore the alignment encoding after inference */
	if (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE) {
	for (int i = 0; i < ali->nSites; i++)
	for (int s = 0; s < ali->nSeqs; s++)
	seq(s, i) += 1;
	}

	return (numeric_t *) x;
	}

	void EstimatePairModelMAP(numeric_t x, numeric_t lambdas, alignment_t *ali,
	options_t *options) {
	/* Computes Maximum a posteriori (MAP) estimates for the parameters of
	and undirected graphical model by L-BFGS */

	/* Start timer */
	gettimeofday(&ali->start, NULL);

	/* Initialize L-BFGS */
	lbfgs_parameter_t param;
	lbfgs_parameter_init(&param);
	param.epsilon = 1E-3;
	param.max_iterations = options->maxIter; /* 0 is unbounded */

	/* Estimate parameters by optimization */
	static lbfgs_evaluate_t algo;
	switch(options->estimatorMAP) {
	case INFER_MAP_PLM:
	algo = PLMNegLogPosterior;
	break;
	case INFER_MAP_PLM_GAPREDUCE:
	algo = PLMNegLogPosteriorGapReduce;
	break;
	case INFER_MAP_PLM_BLOCK:
	algo = PLMNegLogPosteriorBlock;
	break;
	case INFER_MAP_PLM_DROPOUT:
	algo = PLMNegLogPosteriorDO;
	break;
	default:
	algo = PLMNegLogPosterior;
	}

	if (options->zeroAPC == 1) fprintf(stderr,
	"Estimating coupling hyperparameters le = 1/2 inverse variance\n");

	/* Problem instance in void array */
	void d[3] = {(void )ali, (void )options, (void )lambdas};

	if (options->sgd == 1) {
	/* Scale hyperparams for minibatch */
	numeric_t scale = (numeric_t) options->sgdBatchSize / ali->nEff;
	options->lambdaGroup *= scale;
	for (int i = 0; i < ali->nSites; i++) lambdaHi(i) *= scale;
	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	lambdaEij(i, j) *= scale;

	/* SGD optimization */
	numeric_t crit = 0.01;
	void d[4] = {(void )ali, (void )options, (void )lambdas, (void *) algo};
	SGDOptimize(SGDWrapperPLM, d, x, ali->nParams, options->maxIter, crit);

	/* Unscale hyperparams for minibatch */
	numeric_t invScale = ali->nEff / (numeric_t) options->sgdBatchSize;
	options->lambdaGroup *= invScale;
	for (int i = 0; i < ali->nSites; i++) lambdaHi(i) *= invScale;
	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	lambdaEij(i, j) *= invScale;
	} else {
	/* L-BFGS optimization */
	int ret = 0;
	lbfgsfloatval_t fx;
	ret = lbfgs(ali->nParams, x, &fx, algo, ReportProgresslBFGS,
	(void*)d, &param);
	fprintf(stderr, "Gradient optimization: %s\n", LBFGSErrorString(ret));
	}

	/* Optionally re-estimate parameters with adjusted hyperparameters */
	if (options->zeroAPC == 1) {
	/* Form new priors on the variances */
	ZeroAPCPriors(ali, options, lambdas, x);

	/* Reinitialize coupling parameters */
	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++)
	xEij(i, j, ai, aj) = 0.0;

	/* Iterate estimation with new hyperparameter estimates */
	options->zeroAPC = 2;
	lbfgsfloatval_t fx2;
	int ret2 = lbfgs(ali->nParams, x, &fx2, algo,
	ReportProgresslBFGS, (void*)d, &param);
	fprintf(stderr, "Gradient optimization: %s\n", LBFGSErrorString(ret2));
	}
	}

	void SGDOptimize(gradfun_t grad, void data, numeric_t x, const int n,
	const int maxIter, const numeric_t crit) {
	/* Opitimize an objective function by Stochastic Gradient Descent (Adam)
	Arguments:
	grad gradient of objective
	data pointer to data
	x estimated parameters (length n)
	n number of parameters
	eps learning rate
	maxIter maximum number of iterations
	crit stop when \|\|grad\|\| / \|\|x\|\| < crit
	*/
	// numeric_t ALPHA0 = 0.001;
	// numeric_t ALPHAT = 0.00001;
	numeric_t BETA1 = 0.9;
	numeric_t BETA2 = 0.99;
	numeric_t EPSILON = 1E-8;

	numeric_t g = (numeric_t ) malloc(n * sizeof(numeric_t));
	numeric_t criterion = crit + 1.0;

	/* Begin profiling */
	struct timeval start;
	gettimeofday(&start, NULL);

	/* Initialize estimates of first and second moments of the gradient */
	numeric_t meanX = (numeric_t ) malloc(n * sizeof(numeric_t));
	numeric_t meanG = (numeric_t ) malloc(n * sizeof(numeric_t));
	numeric_t squareG = (numeric_t ) malloc(n * sizeof(numeric_t));
	for (int i = 0; i < n; i++) meanX[i] = 0;
	for (int i = 0; i < n; i++) meanG[i] = 0;
	for (int i = 0; i < n; i++) squareG[i] = 0;

	/* Optimization loop */
	int t = 1;
	do {
	/* Estimate the gradient */
	for (int i = 0; i < n; i++) g[i] = 0;
	numeric_t f = grad(data, x, g, n);

	/* Update estimates of moments */
	for (int i = 0; i < n; i++)
	meanG[i] = BETA1 * meanG[i] + (1.0 - BETA1) * g[i];
	for (int i = 0; i < n; i++)
	squareG[i] = BETA2 * squareG[i] + (1.0 - BETA2) * g[i] * g[i];

	/* Update Q with Adam learning rates */
	// numeric_t schedule = ALPHA;
	// numeric_t frac = (numeric_t) t / (numeric_t) maxIter;
	// frac = floor(frac * 5) / 5.;
	// numeric_t schedule = exp((1 - frac) * log(ALPHA0) + frac * log(ALPHAT));
	// Anneal strategy #2
	numeric_t schedule = 0.01 * pow(0.5, (t / 50));
	// numeric_t schedule = 0.01;
	numeric_t alpha = schedule
	* sqrt(1.0 - pow(BETA2, (numeric_t) t))
	/ (1.0 - pow(BETA1, (numeric_t) t));
	for (int i = 0; i < n; i++)
	x[i] -= meanG[i] * alpha / (sqrt(squareG[i]) + EPSILON);

	/* Update Polyak average */
	for (int i = 0; i < n; i++)
	meanX[i] = BETA1 * meanX[i] + (1 - BETA1) * x[i];

	/* Stopping criterion: \|\|grad(params)\|\| / \|\|params\|\| */
	numeric_t paramNorm = 1E-6;
	for (int i = 0; i < n; i++) paramNorm += fabs(x[i]) / (numeric_t) n;
	numeric_t gradNorm = 1E-6;
	for (int i = 0; i < n; i++)
	gradNorm += fabs(meanG[i]) / (numeric_t) n;
	criterion = gradNorm;

	if (t == 1)
	fprintf(stderr, "iter\ttime\tobj\t\|x\|\t\|g\|\tcrit\n");
	fprintf(stderr, "%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\n",
	t, ElapsedTime(&start), f, paramNorm, gradNorm, criterion);
	t++;
	} while (t <= maxIter && criterion > crit);
	// for (int i = 0; i < n; i++) x[i] = meanX[i] / ((numeric_t) t - 1);
	for (int i = 0; i < n; i++) x[i] = meanX[i];
	free(meanX);
	free(meanG);
	free(squareG);
	free(g);
	}

	numeric_t SGDWrapperPLM(void data, const numeric_t x, numeric_t *g,
	const int n) {
	/* Wrap objective function for L-BFGS to support
	minibatched Stochastic Gradient Descent (SGD)
	*/
	void d = (void )data;
	alignment_t ali = (alignment_t ) d[0];
	options_t options = (options_t ) d[1];
	numeric_t lambdas = (numeric_t ) d[2];
	lbfgs_evaluate_t lbfgsfun = (lbfgs_evaluate_t) d[3];

	/* Shallow copy alignment and options */
	alignment_t aliBatch = (alignment_t ) malloc(sizeof(alignment_t));
	options_t optionsBatch = (options_t ) malloc(sizeof(options_t));
	aliBatch = ali;
	optionsBatch = options;

	/* Build CDF */
	numeric_t CDF = (numeric_t ) malloc(sizeof(numeric_t) * ali->nSeqs);
	numeric_t weightSum = 0;
	for (int i = 0; i < ali->nSeqs; i++) weightSum += ali->weights[i];
	CDF[0] = ali->weights[0] / weightSum;
	for (int i = 1; i < ali->nSeqs; i++) CDF[i] = CDF[i-1] + ali->weights[i] / weightSum;

	/* Sample a batch of sequences */
	int batchSize = options->sgdBatchSize;
	int indices = (int ) malloc(sizeof(int) * batchSize);
	numeric_t u = (numeric_t ) malloc(sizeof(numeric_t) * batchSize);
	for (int i = 0; i < batchSize; i++) indices[i] = -1;
	for (int i = 0; i < batchSize; i++) u[i] = (numeric_t) genrand_real3();
	for (int s = 0; s < ali->nSeqs; s++)
	for (int i = 0; i < batchSize; i++)
	if (indices[i] < 0 && u[i] <= CDF[s]) indices[i] = s;
	for (int i = 0; i < batchSize; i++)
	if (indices[i] < 0) indices[i] = batchSize - 1;

	/* Clone mini-alignment and weights */
	aliBatch->sequences =
	(letter_t ) malloc(sizeof(letter_t) batchSize * ali->nSites);
	aliBatch->weights =
	(numeric_t ) malloc(sizeof(numeric_t) batchSize);
	for (int i = 0; i < batchSize; i++)
	aliBatch->weights[i] = 1.0;
	for (int i = 0; i < batchSize; i++)
	for (int j = 0; j < ali->nSites; j++)
	aliBatch->sequences[j + i * ali->nSites] = seq(indices[i], j);
	free(u);
	free(CDF);
	free(indices);
	aliBatch->nSeqs = batchSize;

	/* Run the wrapped objective */
	void instance[3] = {(void )aliBatch, (void )optionsBatch, (void )lambdas};
	numeric_t f = lbfgsfun(instance, x, g, n, 0);

	/* Rescale */
	numeric_t scale = weightSum / (numeric_t) batchSize;
	f *= scale;
	for (int i = 0; i < n; i++) g[i] *= scale;

	/* Clean up */
	free(aliBatch->sequences);
	free(aliBatch->weights);
	free(aliBatch);
	free(optionsBatch);
	return f;
	}


	static lbfgsfloatval_t PLMNegLogPosterior(void *instance,
	const lbfgsfloatval_t x, lbfgsfloatval_t g, const int n,
	const lbfgsfloatval_t step) {
	/* Compute the the negative log posterior, which is the negative
	penalized log-(pseudo)likelihood and the objective for MAP inference
	*/
	void d = (void )instance;
	alignment_t ali = (alignment_t ) d[0];
	options_t options = (options_t ) d[1];
	numeric_t lambdas = (numeric_t ) d[2];

	/* Initialize log-likelihood and gradient */
	lbfgsfloatval_t fx = 0.0;
	for (int i = 0; i < ali->nParams; i++) g[i] = 0;

	/* Negative log-pseudolikelihood */
	#pragma omp parallel for
	for (int i = 0; i < ali->nSites; i++) {
	numeric_t H = (numeric_t ) malloc(ali->nCodes * sizeof(numeric_t));
	numeric_t P = (numeric_t ) malloc(ali->nCodes * sizeof(numeric_t));

	numeric_t siteFx = 0.0;
	/* Reshape site parameters and gradient into local blocks */
	numeric_t Xi = (numeric_t ) malloc(ali->nCodes * ali->nCodes
	* ali->nSites * sizeof(numeric_t));
	for (int j = 0; j < i; j++)
	for (int a = 0; a < ali->nCodes; a++)
	for (int b = 0; b < ali->nCodes; b++)
	siteE(j, a, b) = xEij(i, j, a, b);
	for (int j = i + 1; j < ali->nSites; j++)
	for (int a = 0; a < ali->nCodes; a++)
	for (int b = 0; b < ali->nCodes; b++)
	siteE(j, a, b) = xEij(i, j, a, b);
	for (int a = 0; a < ali->nCodes; a++) siteH(i, a) = xHi(i, a);

	numeric_t Di = (numeric_t ) malloc(ali->nCodes * ali->nCodes
	* ali->nSites * sizeof(numeric_t));
	for (int d = 0; d < ali->nCodes * ali->nCodes * ali->nSites; d++)
	Di[d] = 0.0;

	/* Site negative conditional log likelihoods */
	for (int s = 0; s < ali->nSeqs; s++) {
	/* Compute potentials */
	for (int a = 0; a < ali->nCodes; a++) H[a] = siteH(i, a);
	for (int j = 0; j < i; j++)
	for (int a = 0; a < ali->nCodes; a++)
	H[a] += siteE(j, a, seq(s, j));
	for (int j = i + 1; j < ali->nSites; j++)
	for (int a = 0; a < ali->nCodes; a++)
	H[a] += siteE(j, a, seq(s, j));

	/* Conditional distribution given sequence background */
	numeric_t scale = H[0];
	for (int a = 1; a < ali->nCodes; a++)
	scale = (scale >= H[a] ? scale : H[a]);
	for (int a = 0; a < ali->nCodes; a++) P[a] = exp(H[a] - scale);
	numeric_t Z = 0;
	for (int a = 0; a < ali->nCodes; a++) Z += P[a];
	numeric_t Zinv = 1.0 / Z;
	for (int a = 0; a < ali->nCodes; a++) P[a] *= Zinv;


	/* Log-likelihood contributions are scaled by sequence weight */
	numeric_t w = ali->weights[s];
	siteFx -= w * log(P[seq(s, i)]);

	/* Field gradient */
	siteDH(i, seq(s, i)) -= w;
	for (int a = 0; a < ali->nCodes; a++)
	siteDH(i, a) -= -w * P[a];

	/* Couplings gradient */
	int ix = seq(s, i);
	for (int j = 0; j < i; j++)
	siteDE(j, ix, seq(s, j)) -= w;
	for (int j = i + 1; j < ali->nSites; j++)
	siteDE(j, ix, seq(s, j)) -= w;
	for (int j = 0; j < i; j++)
	for (int a = 0; a < ali->nCodes; a++)
	siteDE(j, a, seq(s, j)) -= -w * P[a];
	for (int j = i + 1; j < ali->nSites; j++)
	for (int a = 0; a < ali->nCodes; a++)
	siteDE(j, a, seq(s, j)) -= -w * P[a];
	}

	/* Contribute local loglk and gradient to global */
	#pragma omp critical
	{
	fx += siteFx;
	for (int j = 0; j < i; j++)
	for (int a = 0; a < ali->nCodes; a++)
	for (int b = 0; b < ali->nCodes; b++)
	dEij(i, j, a, b) += siteDE(j, a, b);
	for (int j = i + 1; j < ali->nSites; j++)
	for (int a = 0; a < ali->nCodes; a++)
	for (int b = 0; b < ali->nCodes; b++)
	dEij(i, j, a, b) += siteDE(j, a, b);
	for (int a = 0; a < ali->nCodes; a++) dHi(i, a) += siteDH(i, a);
	free(Xi);
	free(Di);
	}

	free(H);
	free(P);
	}

	ali->negLogLk = fx;

	/* Gaussian priors */
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nCodes; ai++) {
	dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai);
	fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai);
	}

	for (int i = 0; i < ali->nSites-1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++) {
	dEij(i, j, ai, aj) += lambdaEij(i, j)
	* 2.0 * xEij(i, j, ai, aj);
	fx += lambdaEij(i, j)
	* xEij(i, j, ai, aj) * xEij(i, j, ai, aj);
	}

	fx = PostCondition(x, g, fx, ali, options);
	return fx;
	}

	static lbfgsfloatval_t PLMNegLogPosteriorGapReduce(void *instance,
	const lbfgsfloatval_t x, lbfgsfloatval_t g, const int n,
	const lbfgsfloatval_t step) {
	/* Compute the the negative log posterior, which is the negative
	penalized log-(pseudo)likelihood and the objective for MAP inference
	*/
	void d = (void )instance;
	alignment_t ali = (alignment_t ) d[0];
	options_t options = (options_t ) d[1];
	numeric_t lambdas = (numeric_t ) d[2];

	/* Initialize log-likelihood and gradient */
	lbfgsfloatval_t fx = 0.0;
	for (int i = 0; i < ali->nParams; i++) g[i] = 0;

	/* Negative log-pseudolikelihood */
	#pragma omp parallel for
	for (int i = 0; i < ali->nSites; i++) {
	numeric_t H = (numeric_t ) malloc(ali->nCodes * sizeof(numeric_t));
	numeric_t P = (numeric_t ) malloc(ali->nCodes * sizeof(numeric_t));

	numeric_t siteFx = 0.0;
	/* Reshape site parameters and gradient into local blocks */
	numeric_t Xi = (numeric_t ) malloc(ali->nCodes * ali->nCodes
	* ali->nSites * sizeof(numeric_t));
	for (int j = 0; j < i; j++)
	for (int a = 0; a < ali->nCodes; a++)
	for (int b = 0; b < ali->nCodes; b++)
	siteE(j, a, b) = xEij(i, j, a, b);
	for (int j = i + 1; j < ali->nSites; j++)
	for (int a = 0; a < ali->nCodes; a++)
	for (int b = 0; b < ali->nCodes; b++)
	siteE(j, a, b) = xEij(i, j, a, b);
	for (int a = 0; a < ali->nCodes; a++) siteH(i, a) = xHi(i, a);

	numeric_t Di = (numeric_t ) malloc(ali->nCodes * ali->nCodes
	* ali->nSites * sizeof(numeric_t));
	for (int d = 0; d < ali->nCodes * ali->nCodes * ali->nSites; d++)
	Di[d] = 0.0;

	/* Site negative conditional log likelihoods */
	for (int s = 0; s < ali->nSeqs; s++) {
	/* Only ungapped sites are considered in the model */
	if (seq(s, i) >= 0) {
	/* Compute potentials */
	for (int a = 0; a < ali->nCodes; a++) H[a] = siteH(i, a);
	for (int j = 0; j < i; j++)
	for (int a = 0; a < ali->nCodes; a++)
	if (seq(s, j) >= 0)
	H[a] += siteE(j, a, seq(s, j));
	for (int j = i + 1; j < ali->nSites; j++)
	for (int a = 0; a < ali->nCodes; a++)
	if (seq(s, j) >= 0)
	H[a] += siteE(j, a, seq(s, j));

	/* Conditional distribution given sequence background */
	numeric_t scale = H[0];
	for (int a = 1; a < ali->nCodes; a++)
	scale = (scale >= H[a] ? scale : H[a]);
	for (int a = 0; a < ali->nCodes; a++) P[a] = exp(H[a] - scale);
	numeric_t Z = 0;
	for (int a = 0; a < ali->nCodes; a++) Z += P[a];
	numeric_t Zinv = 1.0 / Z;
	for (int a = 0; a < ali->nCodes; a++) P[a] *= Zinv;


	/* Log-likelihood contributions are scaled by sequence weight */
	numeric_t w = ali->weights[s];
	siteFx -= w * log(P[seq(s, i)]);

	/* Field gradient */
	siteDH(i, seq(s, i)) -= w;
	for (int a = 0; a < ali->nCodes; a++)
	siteDH(i, a) -= -w * P[a];

	/* Couplings gradient */
	int ix = seq(s, i);
	for (int j = 0; j < i; j++)
	if (seq(s, j) >= 0)
	siteDE(j, ix, seq(s, j)) -= w;
	for (int j = i + 1; j < ali->nSites; j++)
	if (seq(s, j) >= 0)
	siteDE(j, ix, seq(s, j)) -= w;
	for (int j = 0; j < i; j++)
	if (seq(s, j) >= 0)
	for (int a = 0; a < ali->nCodes; a++)
	siteDE(j, a, seq(s, j)) -= -w * P[a];
	for (int j = i + 1; j < ali->nSites; j++)
	if (seq(s, j) >= 0)
	for (int a = 0; a < ali->nCodes; a++)
	siteDE(j, a, seq(s, j)) -= -w * P[a];
	}
	}

	/* Contribute local loglk and gradient to global */
	#pragma omp critical
	{
	fx += siteFx;
	for (int j = 0; j < i; j++)
	for (int a = 0; a < ali->nCodes; a++)
	for (int b = 0; b < ali->nCodes; b++)
	dEij(i, j, a, b) += siteDE(j, a, b);
	for (int j = i + 1; j < ali->nSites; j++)
	for (int a = 0; a < ali->nCodes; a++)
	for (int b = 0; b < ali->nCodes; b++)
	dEij(i, j, a, b) += siteDE(j, a, b);
	for (int a = 0; a < ali->nCodes; a++) dHi(i, a) += siteDH(i, a);
	free(Xi);
	free(Di);
	}

	free(H);
	free(P);
	}

	ali->negLogLk = fx;

	/* Gaussian priors */
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nCodes; ai++) {
	dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai);
	fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai);
	}

	for (int i = 0; i < ali->nSites-1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++) {
	dEij(i, j, ai, aj) += lambdaEij(i, j)
	* 2.0 * xEij(i, j, ai, aj);
	fx += lambdaEij(i, j)
	* xEij(i, j, ai, aj) * xEij(i, j, ai, aj);
	}

	fx = PostCondition(x, g, fx, ali, options);
	return fx;
	}

	static lbfgsfloatval_t PLMNegLogPosteriorBlock(void *instance,
	const lbfgsfloatval_t x, lbfgsfloatval_t g, const int n,
	const lbfgsfloatval_t step) {
	/* Compute the the negative log posterior, which is the negative
	penalized log-(pseudo)likelihood and the objective for MAP inference
	*/
	void d = (void )instance;
	alignment_t ali = (alignment_t ) d[0];
	options_t options = (options_t ) d[1];
	numeric_t lambdas = (numeric_t ) d[2];

	/* Initialize log-likelihood and gradient */
	lbfgsfloatval_t fx = 0.0;
	for (int i = 0; i < ali->nParams; i++) g[i] = 0;

	/* Block fields hi */
	numeric_t hi = (numeric_t )
	malloc(ali->nSites * ali->nCodes * sizeof(numeric_t));
	numeric_t gHi = (numeric_t )
	malloc(ali->nSites * ali->nCodes * sizeof(numeric_t));
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nCodes; ai++) Hi(i, ai) = xHi(i, ai);
	for (int i = 0; i < ali->nSites * ali->nCodes; i++) gHi[i] = 0;

	/* Block couplings eij */
	numeric_t eij = (numeric_t ) malloc(ali->nSites * ali->nSites
	* ali->nCodes * ali->nCodes * sizeof(numeric_t));
	numeric_t gEij = (numeric_t ) malloc(ali->nSites * ali->nSites
	* ali->nCodes * ali->nCodes * sizeof(numeric_t));
	for (int i = 0; i < ali->nSites * ali->nSites * ali->nCodes * ali->nCodes;
	i++) eij[i] = 0.0;
	for (int i = 0; i < ali->nSites * ali->nSites * ali->nCodes * ali->nCodes;
	i++) gEij[i] = 0.0;
	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++)
	Eij(j, aj, i, ai) = Eij(i, ai, j, aj) = xEij(i, j, ai, aj);


	/* Negative log-pseudolikelihood */
	for (int s = 0; s < ali->nSeqs; s++) {
	/* Form potential for conditional log likelihoods at every site */
	numeric_t H = (numeric_t )
	malloc(ali->nCodes * ali->nSites * sizeof(numeric_t));
	numeric_t Z = (numeric_t ) malloc(ali->nSites * sizeof(numeric_t));

	/* Initialize potentials with fields */
	// memcpy(H, hi, ali->nSites * ali->nCodes * sizeof(numeric_t));
	for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++) H[jx] = hi[jx];

	/* Contribute coupling block due to i, ai */
	for (int i = 0; i < ali->nSites; i++) {
	const letter_t ai = seq(s, i);
	const numeric_t *jB = &(Eij(i, ai, 0, 0));
	for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++)
	H[jx] += jB[jx];
	}

	/* Conditional log likelihoods */
	for (int i = 0; i < ali->nSites * ali->nCodes; i++) H[i] = exp(H[i]);
	for (int i = 0; i < ali->nSites; i++) Z[i] = 0;
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nSites; ai++) Z[i] += Hp(i, ai);
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nSites; ai++) Hp(i, ai) /= Z[i];

	numeric_t seqFx = 0;
	for (int i = 0; i < ali->nSites; i++)
	seqFx -= ali->weights[s] * log(Hp(i, seq(s, i)));

	for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++)
	H[jx] *= -ali->weights[s];

	for (int i = 0; i < ali->nSites; i++)
	gHi(i, seq(s, i)) -= ali->weights[s];
	for(int jx = 0; jx < ali->nSites * ali->nCodes; jx++) gHi[jx] -= H[jx];

	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i; j < ali->nSites; j++)
	gEij(i, seq(s, i), j, seq(s, j)) -= ali->weights[s];

	for (int i = 0; i < ali->nSites; i++) {
	const letter_t ai = seq(s, i);
	numeric_t *jgBlock = &(gEij(i, ai, 0, 0));
	for (int jx = 0; jx < ali->nSites * ali->nCodes; jx++)
	jgBlock[jx] -= H[jx];
	}

	free(H);
	free(Z);
	fx += seqFx;
	}

	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	dHi(i, ai) += gHi(i, ai);

	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++)
	dEij(i, j, ai, aj) += gEij(j, aj, i, ai) + gEij(i, ai, j, aj);
	free(hi);
	free(gHi);
	free(eij);
	free(gEij);

	ali->negLogLk = fx;

	/* Gaussian priors */
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nCodes; ai++) {
	dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai);
	fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai);
	}

	for (int i = 0; i < ali->nSites-1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++) {
	dEij(i, j, ai, aj) += lambdaEij(i, j)
	* 2.0 * xEij(i, j, ai, aj);
	fx += lambdaEij(i, j)
	* xEij(i, j, ai, aj) * xEij(i, j, ai, aj);
	}

	fx = PostCondition(x, g, fx, ali, options);
	return fx;
	}

	static lbfgsfloatval_t PLMNegLogPosteriorDO(void *instance,
	const lbfgsfloatval_t x, lbfgsfloatval_t g, const int n,
	const lbfgsfloatval_t step) {
	/* Compute the the negative log posterior, which is the negative
	penalized log-(pseudo)likelihood and the objective for MAP inference
	*/
	void d = (void )instance;
	alignment_t ali = (alignment_t ) d[0];
	options_t options = (options_t ) d[1];
	numeric_t lambdas = (numeric_t ) d[2];

	/* Initialize log-likelihood and gradient */
	lbfgsfloatval_t fx = 0.0;
	for (int i = 0; i < ali->nParams; i++) g[i] = 0;

	numeric_t H = (numeric_t ) malloc(ali->nCodes * sizeof(numeric_t));
	numeric_t P = (numeric_t ) malloc(ali->nCodes * sizeof(numeric_t));
	int drop_mask = (int ) malloc(ali->nParams * sizeof(int));
	for (int s = 0; s < ali->nSeqs; s++) {
	/* Generate random bit mask over parameters */
	for (int p = 0; p < ali->nParams; p ++)
	drop_mask[p] = (int) rand() % 2;

	/* Pseudolikelihood objective */
	for (int i = 0; i < ali->nSites; i++) {
	for (int a = 0; a < ali->nCodes; a++) H[a] = bitHi(i, a)
	* xHi(i, a);
	for (int a = 0; a < ali->nCodes; a++)
	for (int j = 0; j < i; j++)
	H[a] += bitEij(i, j, a, seq(s, j))
	* xEij(i, j, a, seq(s, j));
	for (int a = 0; a < ali->nCodes; a++)
	for (int j = i + 1; j < ali->nSites; j++)
	H[a] += bitEij(i, j, a, seq(s, j))
	* xEij(i, j, a, seq(s, j));

	/* Compute distribution from potential */
	for (int a = 0; a < ali->nCodes; a++) P[a] = exp(H[a]);
	numeric_t Z = 0;
	for (int a = 0; a < ali->nCodes; a++) Z += P[a];
	numeric_t Zinv = 1.0 / Z;
	for (int a = 0; a < ali->nCodes; a++) P[a] *= Zinv;

	/* Log-likelihood contributions */
	fx -= ali->weights[s] * log(P[seq(s, i)]);

	/* Field gradient */
	dHi(i, seq(s, i)) -= bitHi(i, seq(s, i)) * ali->weights[s];
	for (int a = 0; a < ali->nCodes; a++)
	dHi(i, a) -= -bitHi(i, a) * ali->weights[s] * P[a];

	/* Couplings gradient */
	for (int j = 0; j < i; j++)
	dEij(i, j, seq(s, i), seq(s, j)) -=
	bitEij(i, j, seq(s, i), seq(s, j)) * ali->weights[s];
	for (int j = i + 1; j < ali->nSites; j++)
	dEij(i, j, seq(s, i), seq(s, j)) -=
	bitEij(i, j, seq(s, i), seq(s, j)) * ali->weights[s];

	for (int j = 0; j < i; j++)
	for (int a = 0; a < ali->nCodes; a++)
	dEij(i, j, a, seq(s, j)) -=
	-bitEij(i, j, a, seq(s, j)) * ali->weights[s] * P[a];
	for (int j = i + 1; j < ali->nSites; j++)
	for (int a = 0; a < ali->nCodes; a++)
	dEij(i, j, a, seq(s, j)) -=
	-bitEij(i, j, a, seq(s, j)) * ali->weights[s] * P[a];
	}
	}
	free(H);
	free(P);
	free(drop_mask);

	ali->negLogLk = fx;

	/* Gaussian priors */
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nCodes; ai++) {
	dHi(i, ai) += lambdaHi(i) * 2.0 * xHi(i, ai);
	fx += lambdaHi(i) * xHi(i, ai) * xHi(i, ai);
	}

	for (int i = 0; i < ali->nSites-1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++) {
	dEij(i, j, ai, aj) += lambdaEij(i, j)
	* 2.0 * xEij(i, j, ai, aj);
	fx += lambdaEij(i, j)
	* xEij(i, j, ai, aj) * xEij(i, j, ai, aj);
	}

	fx = PostCondition(x, g, fx, ali, options);
	return fx;
	}

	static int ReportProgresslBFGS(void instance, const lbfgsfloatval_t x,
	const lbfgsfloatval_t *g, const lbfgsfloatval_t fx,
	const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm,
	const lbfgsfloatval_t step, int n, int k, int ls) {
	void d = (void )instance;
	alignment_t ali = (alignment_t )d[0];

	/* Compute norms of relevant parameters */
	lbfgsfloatval_t hNorm = 0.0, eNorm = 0.0, hGNorm = 0.0, eGNorm = 0.0;
	for (int i = 0; i < ali->nSites * ali->nCodes; i++)
	hNorm += x[i]*x[i];
	for (int i = 0; i < ali->nSites * ali->nCodes; i++)
	hGNorm += g[i]*g[i];
	for (int i = ali->nSites * ali->nCodes; i < ali->nParams; i++)
	eNorm += x[i]*x[i];
	for (int i = ali->nSites * ali->nCodes; i < ali->nParams; i++)
	eGNorm += g[i]*g[i];
	hNorm = sqrt(hNorm);
	hGNorm = sqrt(hGNorm);
	eNorm = sqrt(eNorm);
	eGNorm = sqrt(eGNorm);

	/* Retrieve elapsed time */
	static struct timeval now;
	gettimeofday(&now, NULL);
	if (now.tv_usec < ali->start.tv_usec) {
	int nsec = (ali->start.tv_usec - now.tv_usec) / 1000000 + 1;
	ali->start.tv_usec -= 1000000 * nsec;
	ali->start.tv_sec += nsec;
	}
	if (now.tv_usec - ali->start.tv_usec > 1000000) {
	int nsec = (now.tv_usec - ali->start.tv_usec) / 1000000;
	ali->start.tv_usec += 1000000 * nsec;
	ali->start.tv_sec -= nsec;
	}
	numeric_t elapsed = (numeric_t) (now.tv_sec - ali->start.tv_sec)
	+ ((numeric_t) (now.tv_usec - ali->start.tv_usec)) / 1E6;

	if (k == 1) fprintf(stderr,
	"iter\ttime\tcond\tfx\t-loglk"
	"\t\|\|h\|\|\t\|\|e\|\|\n");
	fprintf(stderr, "%d\t%.1f\t%.2f\t%.1f\t%.1f\t%.1f\t%.1f\n",
	k, elapsed, gnorm / xnorm, fx, ali->negLogLk, hNorm, eNorm);
	return 0;
	}

	void PreCondition(const lbfgsfloatval_t x, lbfgsfloatval_t g, alignment_t ali, options_t options) {
	/* Currently empty */
	}

	lbfgsfloatval_t PostCondition(const lbfgsfloatval_t x, lbfgsfloatval_t g, lbfgsfloatval_t fx, alignment_t ali, options_t options) {
	if (options->zeroAPC == 1)
	for (int i = 0; i < ali->nSites; i++)
	for (int ai = 0; ai < ali->nCodes; ai++)
	dHi(i, ai) = 0.0;

	/* Group (L1/L2) regularization */
	if (options->lambdaGroup > 0)
	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++) {
	double l2 = REGULARIZATION_GROUP_EPS;
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++)
	l2 += xEij(i, j, ai, aj) * xEij(i, j, ai, aj);
	double l1 = sqrt(l2);
	fx += options->lambdaGroup * l1;
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++)
	dEij(i, j, ai, aj) += options->lambdaGroup * xEij(i, j, ai, aj) / l1;
	}

	return fx;
	}

	void ZeroAPCPriors(alignment_t ali, options_t options, numeric_t *lambdas,
	lbfgsfloatval_t *x) {
	/* Compute the variances of the couplings for each pair */
	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++) {
	/* Mean(eij) over ai, aj */
	numeric_t mean = 0.0;
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++)
	mean += xEij(i, j, ai, aj);
	mean = 1.0 / ((numeric_t) ali->nCodes ali->nCodes);

	/* Var(eij) over ai, aj */
	numeric_t ssq = 0.0;
	for (int ai = 0; ai < ali->nCodes; ai++)
	for (int aj = 0; aj < ali->nCodes; aj++)
	ssq += (xEij(i, j, ai, aj) - mean)
	* (xEij(i, j, ai, aj) - mean);
	/* Use N rather than N-1 since N has better MSE */
	numeric_t var = ssq / ((numeric_t) (ali->nCodes * ali->nCodes));
	lambdaEij(i, j) = var;
	}

	/* Determine the site-wise statistics of the variances */
	numeric_t nPairs = ((numeric_t) ((ali->nSites) * (ali->nSites - 1))) / 2.0;
	numeric_t V_avg = 0.0;
	numeric_t V_pos_avg = (numeric_t ) malloc(ali->nSites * sizeof(numeric_t));
	for (int i = 0; i < ali->nSites; i++) {
	V_pos_avg[i] = 0.0;
	}
	for (int i = 0; i < ali->nSites - 1; i++) {
	for (int j = i + 1; j < ali->nSites; j++) {
	V_pos_avg[i] += lambdaEij(i, j) / (numeric_t) (ali->nSites - 1);
	V_pos_avg[j] += lambdaEij(i, j) / (numeric_t) (ali->nSites - 1);
	V_avg += lambdaEij(i, j) / nPairs;
	}
	}

	/* Remove the first component of the variances */
	for (int i = 0; i < ali->nSites - 1; i++)
	for (int j = i + 1; j < ali->nSites; j++)
	lambdaEij(i, j) =
	lambdaEij(i, j) - V_pos_avg[i] * V_pos_avg[j] / V_avg;

	/* Transform and truncate variances into lambda hyperparameters */
	numeric_t pcount = 0.0;
	numeric_t psum = 0.0;
	numeric_t inbounds = 0;
	numeric_t min = LAMBDA_J_MAX;
	numeric_t max = LAMBDA_J_MIN;
	for (int i = 0; i < ali->nSites - 1; i++) {
	for (int j = i + 1; j < ali->nSites; j++) {
	/* Lambda coefficients are 1/2 the inverse variance */
	if (lambdaEij(i, j) > 0) {
	lambdaEij(i, j) = 1.0 / (2.0 * lambdaEij(i, j));
	psum += lambdaEij(i, j);
	pcount += 1.0;
	} else {
	lambdaEij(i, j) = LAMBDA_J_MAX + 1.0;
	}

	/* Truncate lambda for numerical stability */
	if (lambdaEij(i, j) >= LAMBDA_J_MIN && lambdaEij(i, j) <= LAMBDA_J_MAX)
	inbounds += 1.0 / (numeric_t) ((ali->nSites)*(ali->nSites - 1) / 2.0);
	if (lambdaEij(i, j) < 0 \|\| !isfinite(lambdaEij(i, j)))
	lambdaEij(i, j) = LAMBDA_J_MAX;
	if (lambdaEij(i, j) < LAMBDA_J_MIN) lambdaEij(i, j) = LAMBDA_J_MIN;
	if (lambdaEij(i, j) > LAMBDA_J_MAX) lambdaEij(i, j) = LAMBDA_J_MAX;

	/* Track extremes */
	if (lambdaEij(i, j) > max) max = lambdaEij(i, j);
	if (lambdaEij(i, j) < min) min = lambdaEij(i, j);
	}
	}
	fprintf(stderr, "Raw coupling hyperparameter statistics:\n"
	"\tMean positive lambda: %f\n"
	"\tPercent of ij's positive: %f\n"
	"\tPercent in bounds (%f < L < %f): %f\n",
	psum / pcount,
	pcount / nPairs,
	min, max, inbounds);
	}

	const char *LBFGSErrorString(int ret) {
	const char *p;
	switch(ret) {
	case LBFGSERR_UNKNOWNERROR:
	p = "UNKNOWNERROR";
	break;
	/** Logic error. */
	case LBFGSERR_LOGICERROR:
	p = "LOGICERROR";
	break;
	/** Insufficient memory. */
	case LBFGSERR_OUTOFMEMORY:
	p = "OUTOFMEMORY";
	break;
	/** The minimization process has been canceled. */
	case LBFGSERR_CANCELED:
	p = "CANCELED";
	break;
	/** Invalid number of variables specified. */
	case LBFGSERR_INVALID_N:
	p = "INVALID_N";
	break;
	/** Invalid number of variables (for SSE) specified. */
	case LBFGSERR_INVALID_N_SSE:
	p = "INVALID_N_SSE";
	break;
	/** The array x must be aligned to 16 (for SSE). */
	case LBFGSERR_INVALID_X_SSE:
	p = "INVALID_X_SSE";
	break;
	/** Invalid parameter lbfgs_parameter_t::epsilon specified. */
	case LBFGSERR_INVALID_EPSILON:
	p = "INVALID_EPSILON";
	break;
	/** Invalid parameter lbfgs_parameter_t::past specified. */
	case LBFGSERR_INVALID_TESTPERIOD:
	p = "INVALID_TESTPERIOD";
	break;
	/** Invalid parameter lbfgs_parameter_t::delta specified. */
	case LBFGSERR_INVALID_DELTA:
	p = "INVALID_DELTA";
	break;
	/** Invalid parameter lbfgs_parameter_t::linesearch specified. */
	case LBFGSERR_INVALID_LINESEARCH:
	p = "INVALID_LINESEARCH";
	break;
	/** Invalid parameter lbfgs_parameter_t::max_step specified. */
	case LBFGSERR_INVALID_MINSTEP:
	p = "INVALID_MINSTEP";
	break;
	/** Invalid parameter lbfgs_parameter_t::max_step specified. */
	case LBFGSERR_INVALID_MAXSTEP:
	p = "INVALID_MAXSTEP";
	break;
	/** Invalid parameter lbfgs_parameter_t::ftol specified. */
	case LBFGSERR_INVALID_FTOL:
	p = "INVALID_FTOL";
	break;
	/** Invalid parameter lbfgs_parameter_t::wolfe specified. */
	case LBFGSERR_INVALID_WOLFE:
	p = "INVALID_WOLFE";
	break;
	/** Invalid parameter lbfgs_parameter_t::gtol specified. */
	case LBFGSERR_INVALID_GTOL:
	p = "INVALID_GTOL";
	break;
	/** Invalid parameter lbfgs_parameter_t::xtol specified. */
	case LBFGSERR_INVALID_XTOL:
	p = "INVALID_XTOL";
	break;
	/** Invalid parameter lbfgs_parameter_t::max_linesearch specified. */
	case LBFGSERR_INVALID_MAXLINESEARCH:
	p = "INVALID_MAXLINESEARCH";
	break;
	/** Invalid parameter lbfgs_parameter_t::orthantwise_c specified. */
	case LBFGSERR_INVALID_ORTHANTWISE:
	p = "INVALID_ORTHANTWISE";
	break;
	/** Invalid parameter lbfgs_parameter_t::orthantwise_start specified. */
	case LBFGSERR_INVALID_ORTHANTWISE_START:
	p = "INVALID_ORTHANTWISE_START";
	break;
	/** Invalid parameter lbfgs_parameter_t::orthantwise_end specified. */
	case LBFGSERR_INVALID_ORTHANTWISE_END:
	p = "ORTHANTWISE_END";
	break;
	/** The line-search step went out of the interval of uncertainty. */
	case LBFGSERR_OUTOFINTERVAL:
	p = "OUTOFINTERVAL";
	break;
	/** A logic error occurred; alternatively: the interval of uncertainty
	became too small. */
	case LBFGSERR_INCORRECT_TMINMAX:
	p = "INCORRECT_TMINMAX";
	break;
	/** A rounding error occurred; alternatively: no line-search step
	satisfies the sufficient decrease and curvature conditions. */
	case LBFGSERR_ROUNDING_ERROR:
	p = "ROUNDING_ERROR";
	break;
	/** The line-search step became smaller than lbfgs_parameter_t::min_step. */
	case LBFGSERR_MINIMUMSTEP:
	p = "MINIMUMSTEP";
	break;
	/** The line-search step became larger than lbfgs_parameter_t::max_step. */
	case LBFGSERR_MAXIMUMSTEP:
	p = "MAXILBFGSERR_MUMSTEP";
	break;
	/** The line-search routine reaches the maximum number of evaluations. */
	case LBFGSERR_MAXIMUMLINESEARCH:
	p = "MAXIMUMLINESEARCH";
	break;
	/** The algorithm routine reaches the maximum number of iterations. */
	case LBFGSERR_MAXIMUMITERATION:
	p = "MAXIMUMITERATION";
	break;
	/** Relative width of the interval of uncertainty is at most
	lbfgs_parameter_t::xtol. */
	case LBFGSERR_WIDTHTOOSMALL:
	p = "WIDTHTOOSMALL";
	break;
	/** A logic error (negative line-search step) occurred. */
	case LBFGSERR_INVALIDPARAMETERS:
	p = "INVALIDPARAMETERS";
	break;
	/** The current search direction increases the objective function value. */
	case LBFGSERR_INCREASEGRADIENT:
	p = "INCREASEGRADIENT";
	break;
	case 0:
	p = "Minimization success";
	break;
	default:
	p = "No detected error";
	break;
	}
	return p;
	}

	numeric_t ElapsedTime(struct timeval *start) {
	/* Computes the elapsed time from START to NOW in seconds */
	struct timeval now;
	gettimeofday(&now, NULL);
	if (now.tv_usec < start->tv_usec) {
	int nsec = (start->tv_usec - now.tv_usec) / 1000000 + 1;
	start->tv_usec -= 1000000 * nsec;
	start->tv_sec += nsec;
	}
	if (now.tv_usec - start->tv_usec > 1000000) {
	int nsec = (now.tv_usec - start->tv_usec) / 1000000;
	start->tv_usec += 1000000 * nsec;
	start->tv_sec -= nsec;
	}
	return (numeric_t) (now.tv_sec - start->tv_sec)
	+ ((numeric_t) (now.tv_usec - start->tv_usec)) / 1E6;
	}