|
#ifndef PLM_H |
|
#define PLM_H |
|
|
|
#include "lbfgs.h" |
|
#include <sys/time.h> |
|
|
|
#ifdef USE_FLOAT |
|
typedef float numeric_t; |
|
#else |
|
typedef double numeric_t; |
|
#endif |
|
typedef int letter_t; |
|
|
|
|
|
|
|
|
|
enum { |
|
|
|
INFER_MAP |
|
}; |
|
|
|
|
|
enum { |
|
|
|
INFER_MAP_PLM, |
|
|
|
INFER_MAP_PLM_GAPREDUCE, |
|
|
|
INFER_MAP_PLM_BLOCK, |
|
|
|
INFER_MAP_PLM_DROPOUT, |
|
|
|
INFER_MPF |
|
}; |
|
|
|
|
|
|
|
|
|
enum { |
|
|
|
REGULARIZE_L2 |
|
}; |
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
char *target; |
|
char *alphabet; |
|
|
|
|
|
int usePairs; |
|
int estimator; |
|
int estimatorMAP; |
|
int maxIter; |
|
|
|
|
|
int sgd; |
|
int sgdBatchSize; |
|
|
|
|
|
int fastWeights; |
|
numeric_t theta; |
|
numeric_t scale; |
|
|
|
|
|
numeric_t lambdaH; |
|
numeric_t lambdaE; |
|
numeric_t lambdaGroup; |
|
|
|
|
|
int zeroAPC; |
|
} options_t; |
|
|
|
options_t *default_options(); |
|
|
|
|
|
|
|
|
|
typedef struct { |
|
|
|
int nSeqs; |
|
int nSites; |
|
int nCodes; |
|
char *alphabet; |
|
char **names; |
|
letter_t *sequences; |
|
|
|
|
|
int nSkippedSeqs; |
|
int *skippedSeqs; |
|
|
|
|
|
int target; |
|
int *offsets; |
|
|
|
|
|
numeric_t nEff; |
|
numeric_t *weights; |
|
numeric_t *fi; |
|
numeric_t *fij; |
|
|
|
|
|
int nParams; |
|
numeric_t negLogLk; |
|
struct timeval start; |
|
} alignment_t; |
|
|
|
|
|
|
|
|
|
void run_plmc(char *alignFile, char* outputFile, char *couplingsFile, |
|
char *weightsFile, char *weightsOutputFile, options_t *options); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
alignment_t *MSARead(char *alignFile, options_t *options); |
|
|
|
|
|
void MSACountMarginals(alignment_t *ali, options_t *options); |
|
|
|
|
|
void MSAFree(alignment_t *ali, options_t *options); |
|
|
|
|
|
void OutputParametersSite(char *outputFile, const numeric_t *x, |
|
alignment_t *ali); |
|
void OutputParametersFull(char *outputFile, const numeric_t *x, |
|
alignment_t *ali, options_t *options); |
|
void OutputCouplingScores(char *couplingsFile, const numeric_t *x, |
|
alignment_t *ali, options_t *options); |
|
|
|
|
|
|
|
#define BUFFER_SIZE 40960 |
|
#define fgetstr(str, fp) {char *endPos; \ |
|
if (fgets(str, BUFFER_SIZE, fp) != NULL) { \ |
|
if ((endPos = strchr(str, '\n')) != NULL) \ |
|
*endPos = '\0'; \ |
|
\ |
|
while (str[strlen(str) - 1] == '\r') \ |
|
str[strlen(str) - 1] = '\0'; \ |
|
} \ |
|
} |
|
|
|
|
|
#define xHi(i, Ai) x[i + ali->nSites * (Ai)] |
|
#define xEij(i, j, Ai, Aj) x[ali->nSites * ali->nCodes + (i < j ? (((j) * (j - 1)/2 + i) * ali->nCodes * ali->nCodes + (Aj) * ali->nCodes + Ai) : (((i)*(i - 1)/2 + j) * ali->nCodes * ali->nCodes + (Ai) * ali->nCodes + Aj))] |
|
#define dHi(i, Ai) g[i + ali->nSites * (Ai)] |
|
#define dEij(i, j, Ai, Aj) g[ali->nSites * ali->nCodes + (i < j ? (((j) * (j - 1)/2 + i) * ali->nCodes * ali->nCodes + (Aj) * ali->nCodes + Ai) : (((i)*(i - 1)/2 + j) * ali->nCodes * ali->nCodes + (Ai) * ali->nCodes + Aj))] |
|
|
|
#define wHi(w, i, Ai) w[i + ali->nSites * (Ai)] |
|
#define wEij(w, i, j, Ai, Aj) w[ali->nSites * ali->nCodes + (i < j ? (((j) * (j - 1)/2 + i) * ali->nCodes * ali->nCodes + (Aj) * ali->nCodes + Ai) : (((i)*(i - 1)/2 + j) * ali->nCodes * ali->nCodes + (Ai) * ali->nCodes + Aj))] |
|
#define wLambdaHi(w, i) w[i] |
|
#define wLambdaEij(w, i, j) w[ali->nSites + (i < j ? ((j)*(j - 1)/2 + i) : ((i)*(i - 1)/2 + j))] |
|
|
|
|
|
|
|
|
|
|
|
|
|
#define siteH(i, a) Xi[a + ali->nCodes * (a + ali->nCodes * (i))] |
|
#define siteE(j, ai, aj) Xi[ai + ali->nCodes * (aj + ali->nCodes * (j))] |
|
#define siteDH(i, a) Di[a + ali->nCodes * (a + ali->nCodes * (i))] |
|
#define siteDE(j, ai, aj) Di[ai + ali->nCodes * (aj + ali->nCodes * (j))] |
|
|
|
|
|
#define Hp(i, ai) H[ai + ali->nCodes * (i)] |
|
#define Hi(i, ai) hi[ai + ali->nCodes * (i)] |
|
#define gHi(i, ai) gHi[ai + ali->nCodes * (i)] |
|
#define Eij(i, ai, j, aj) eij[aj + ali->nCodes * (j + ali->nSites * (ai + ali->nCodes * (i)))] |
|
#define gEij(i, ai, j, aj) gEij[aj + ali->nCodes * (j + ali->nSites * (ai + ali->nCodes * (i)))] |
|
|
|
|
|
#define bitHi(i, Ai) drop_mask[i + ali->nSites * (Ai)] |
|
#define bitEij(i, j, Ai, Aj) drop_mask[ali->nSites * ali->nCodes \ |
|
+ (i < j ? (((j)*(j - 1)/2 + i) * ali->nCodes * ali->nCodes + (Aj) * ali->nCodes + Ai) \ |
|
: (((i)*(i - 1)/2 + j) * ali->nCodes * ali->nCodes + (Ai) * ali->nCodes + Aj))] |
|
|
|
|
|
#define lambdaHi(i) lambdas[i] |
|
#define lambdaEij(i,j) lambdas[ali->nSites + (i < j ? ((j)*(j - 1)/2 + i) : ((i)*(i - 1)/2 + j))] |
|
#define gLambdaHi(i) gLambdas[i] |
|
#define gLambdaEij(i,j) gLambdas[ali->nSites + (i < j ? ((j)*(j - 1)/2 + i) : ((i)*(i - 1)/2 + j))] |
|
|
|
|
|
|
|
#define coupling(i,j) couplings[(i < j ? ((j)*(j - 1)/2 + i) : ((i)*(i - 1)/2 + j))] |
|
|
|
|
|
|
|
#define seq(s, i) ali->sequences[i + (s) * ali->nSites] |
|
#define fi(i, Ai) ali->fi[i + ali->nSites * (Ai)] |
|
#define fij(i, j, Ai, Aj) ali->fij[(i < j ? (((j)*(j - 1)/2 + i) * ali->nCodes * ali->nCodes + (Aj) * ali->nCodes + Ai) : (((i)*(i - 1)/2 + j) * ali->nCodes * ali->nCodes + (Ai) * ali->nCodes + Aj))] |
|
#define M(s, i, m) membership_matrix[s + ali->nSeqs * (i + ali->nSites * (m))] |
|
#define g_ij(s, i, m) g_ij[s + ali->nSeqs * (i + ali->nSites * (m))] |
|
|
|
#endif |
|
|