|
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdlib.h> |
|
#include <ctype.h> |
|
#include <math.h> |
|
#include <stdio.h> |
|
#include <stdint.h> |
|
#include <string.h> |
|
|
|
#include "include/twister.h" |
|
#include "include/plm.h" |
|
#include "include/inference.h" |
|
#include "include/weights.h" |
|
|
|
|
|
void MSAReadSeq(char *seq, FILE *fpAli); |
|
letter_t MSAReadCode(char c, char *alphabet, int nCodes); |
|
|
|
|
|
int verbose = 2; |
|
|
|
|
|
const char *codesAA = "-ACDEFGHIKLMNPQRSTVWY"; |
|
|
|
|
|
const numeric_t REGULARIZATION_LAMBDA_H = 0.01; |
|
const numeric_t REGULARIZATION_LAMBDA_E = 100.0; |
|
const numeric_t REGULARIZATION_LAMBDA_GROUP = 0.0; |
|
const numeric_t REWEIGHTING_THETA = 0.20; |
|
const numeric_t REWEIGHTING_SCALE = 1.0; |
|
const int ZERO_APC_PRIORS = 0; |
|
const int SGD_BATCH_SIZE = 2048; |
|
const int REWEIGHTING_SAMPLES = 5000; |
|
|
|
options_t* default_options() { |
|
|
|
options_t *options = (options_t *) malloc(sizeof(options_t)); |
|
|
|
options->lambdaH = REGULARIZATION_LAMBDA_H; |
|
options->lambdaE = REGULARIZATION_LAMBDA_E; |
|
options->lambdaGroup = REGULARIZATION_LAMBDA_GROUP; |
|
options->scale = REWEIGHTING_SCALE; |
|
options->zeroAPC = 0; |
|
options->maxIter = 0; |
|
options->usePairs = 1; |
|
options->estimator = INFER_MAP; |
|
options->estimatorMAP = INFER_MAP_PLM; |
|
options->target = NULL; |
|
options->alphabet = (char *) codesAA; |
|
|
|
|
|
options->fastWeights = 0; |
|
options->theta = REWEIGHTING_THETA; |
|
|
|
|
|
options->sgd = 0; |
|
options->sgdBatchSize = SGD_BATCH_SIZE; |
|
|
|
return options; |
|
} |
|
|
|
void run_plmc(char *alignFile, char* outputFile, char *couplingsFile, |
|
char *weightsFile, char *weightsOutputFile, options_t *options) { |
|
|
|
|
|
init_genrand(42); |
|
|
|
|
|
alignment_t *ali = MSARead(alignFile, options); |
|
|
|
if (weightsFile != NULL) { |
|
ReadCustomWeightsFile(weightsFile, ali); |
|
} else { |
|
|
|
MSAReweightSequences(ali, options); |
|
} |
|
if (weightsOutputFile != NULL) { |
|
WriteWeightsFile(weightsOutputFile, ali); |
|
} |
|
|
|
|
|
MSACountMarginals(ali, options); |
|
|
|
|
|
numeric_t *x = InferPairModel(ali, options); |
|
|
|
|
|
if (outputFile != NULL) |
|
OutputParametersFull(outputFile, x, ali, options); |
|
if (couplingsFile != NULL) |
|
OutputCouplingScores(couplingsFile, x, ali, options); |
|
|
|
|
|
MSAFree(ali, options); |
|
} |
|
|
|
alignment_t *MSARead(char *alignFile, options_t *options) { |
|
|
|
FILE *fpAli = NULL; |
|
if (alignFile != NULL) { |
|
fpAli = fopen(alignFile, "r"); |
|
} else { |
|
fprintf(stderr, "Must specify alignment file: -a ALIGN_FILE\n"); |
|
exit(1); |
|
} |
|
if (fpAli == NULL) { |
|
fprintf(stderr, "Error opening alignment file\n"); |
|
exit(1); |
|
} |
|
|
|
|
|
alignment_t *ali = (alignment_t *) malloc(sizeof(alignment_t)); |
|
ali->nSeqs = ali->nSites = ali->nCodes = 0; |
|
ali->alphabet = options->alphabet; |
|
ali->names = NULL; |
|
ali->sequences = NULL; |
|
ali->target = -1; |
|
ali->offsets = NULL; |
|
ali->nEff = 0; |
|
ali->weights = ali->fi = ali->fij = NULL; |
|
ali->nParams = 0; |
|
|
|
|
|
char name[BUFFER_SIZE]; |
|
char seq[BUFFER_SIZE]; |
|
|
|
fgetstr(name, fpAli); |
|
if (*name == '>') { |
|
MSAReadSeq(seq, fpAli); |
|
} else { |
|
fprintf(stderr, "Error reading alignment:" |
|
" First line should start with >\n"); |
|
exit(1); |
|
} |
|
ali->nCodes = strlen(ali->alphabet); |
|
ali->nSites = strlen(seq); |
|
ali->nSeqs = 1; |
|
while (!feof(fpAli)) { |
|
char c = fgetc(fpAli); |
|
if (c == '>') { |
|
|
|
fgetstr(name, fpAli); |
|
MSAReadSeq(seq, fpAli); |
|
} else { |
|
fprintf(stderr, "Error reading alignment:" |
|
" sequence records should start with >\n"); |
|
exit(1); |
|
} |
|
|
|
|
|
if (strlen(seq) != ali->nSites) { |
|
fprintf(stderr, |
|
"Incompatible sequence length (%lu should be %d) for %s:\n%s\n", |
|
strlen(seq), ali->nSites, name, seq); |
|
exit(1); |
|
} |
|
ali->nSeqs++; |
|
} |
|
|
|
|
|
ali->sequences = (letter_t *) |
|
malloc(ali->nSites * ali->nSeqs * sizeof(letter_t)); |
|
ali->names = (char **) malloc(ali->nSeqs * sizeof(char *)); |
|
for (int s = 0; s < ali->nSeqs; s++) |
|
for (int i = 0; i < ali->nSites; i++) seq(s, i) = 0; |
|
for (int s = 0; s < ali->nSeqs; s++) ali->names[s] = NULL; |
|
rewind(fpAli); |
|
for (int s = 0; s < ali->nSeqs; s++) { |
|
|
|
getc(fpAli); |
|
fgetstr(name, fpAli); |
|
ali->names[s] = (char *) malloc((strlen(name) + 1) * sizeof(char)); |
|
strcpy(ali->names[s], name); |
|
|
|
|
|
MSAReadSeq(seq, fpAli); |
|
for (int i = 0; i < ali->nSites; i++) |
|
seq(s, i) = MSAReadCode(seq[i], ali->alphabet, ali->nCodes); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (options->target != NULL) { |
|
for (int s = 0; s < ali->nSeqs; s++) |
|
if (strncmp(options->target, ali->names[s], |
|
strlen(options->target)) == 0) { |
|
if (ali->target >= 0) { |
|
fprintf(stderr, |
|
"Multiple sequences start with %s, picking sequence %d\n", |
|
options->target, s + 1); |
|
} else { |
|
ali->target = s; |
|
} |
|
} |
|
if (ali->target >= 0) { |
|
fprintf(stderr, "Found focus %s as sequence %d\n", options->target, |
|
ali->target + 1); |
|
} else { |
|
fprintf(stderr, |
|
"Could not find %s, proceeding without focus sequence\n", |
|
options->target); |
|
} |
|
} |
|
|
|
|
|
int* seqValid = (int *) malloc(ali->nSeqs * sizeof(int)); |
|
for (int s = 0; s < ali->nSeqs; s++) seqValid[s] = 0; |
|
for (int s = 0; s < ali->nSeqs; s++) |
|
for (int i = 0; i < ali->nSites; i++) |
|
if ((seq(s, i) >= -ali->nCodes) && (seq(s, i) < ali->nCodes)) |
|
seqValid[s]++; |
|
int nValidSeqs = 0; |
|
for (int s = 0; s < ali->nSeqs; s++) |
|
if (seqValid[s] == ali->nSites) nValidSeqs++; |
|
fprintf(stderr, "%d valid sequences out of %d \n", nValidSeqs, ali->nSeqs); |
|
|
|
|
|
ali->nSkippedSeqs = ali->nSeqs - nValidSeqs; |
|
ali->skippedSeqs = (int *) malloc(ali->nSkippedSeqs * sizeof(int)); |
|
for (int s = 0, skipIndex = 0; s < ali->nSeqs; s++) |
|
if (seqValid[s] != ali->nSites) ali->skippedSeqs[skipIndex++] = s; |
|
|
|
|
|
int nValidSites = ali->nSites; |
|
int* siteValid = (int *) malloc(ali->nSites * sizeof(int)); |
|
for (int i = 0; i < ali->nSites; i++) siteValid[i] = 1; |
|
if (ali->target >= 0) { |
|
for (int i = 0; i < ali->nSites; i++) { |
|
|
|
if ((ali->alphabet == codesAA) |
|
&& (seq(ali->target, i) < 0)) |
|
siteValid[i] = 0; |
|
|
|
if ((ali->alphabet == codesAA) |
|
|| (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE)) |
|
if (seq(ali->target, i) == 0) siteValid[i] = 0; |
|
} |
|
nValidSites = 0; |
|
for (int i = 0; i < ali->nSites; i++) |
|
if (siteValid[i] == 1) nValidSites++; |
|
fprintf(stderr, |
|
"%d sites out of %d\n", nValidSites, ali->nSites); |
|
} else { |
|
fprintf(stderr, |
|
"%d sites\n", ali->nSites); |
|
} |
|
|
|
|
|
int leftOffset = 0; |
|
if (ali->target >= 0) { |
|
char *focusName = ali->names[ali->target]; |
|
|
|
if (strlen(focusName) > strlen(options->target) + 1 |
|
&& focusName[strlen(options->target)] == '/') { |
|
|
|
int regLeft = strlen(options->target) + 1; |
|
int ix = 0; |
|
if (isdigit(focusName[regLeft])) { |
|
while (regLeft + ix < strlen(focusName) |
|
&& isdigit(focusName[regLeft + ix + 1])) ix++; |
|
int tens = 1; |
|
leftOffset = -1; |
|
for (int i = ix; i >= 0; i--) { |
|
leftOffset += tens * (focusName[regLeft + i] - '0'); |
|
tens *= 10; |
|
} |
|
fprintf(stderr, "Region starts at %d\n", leftOffset + 1); |
|
} else { |
|
fprintf(stderr, "Error parsing region, assuming start at 1"); |
|
} |
|
} |
|
|
|
|
|
ali->offsets = (int *) malloc(nValidSites * sizeof(int)); |
|
for (int i = 0; i < nValidSites; i++) ali->offsets[i] = i + 1; |
|
int ix = 0; |
|
for (int i = 0; i < ali->nSites; i++) |
|
if (siteValid[i] == 1) { |
|
ali->offsets[ix] = i + 1 + leftOffset; |
|
ix++; |
|
} |
|
|
|
|
|
int targetShift = -1; |
|
for (int i = 0; i <= ali->target; i++) |
|
if (seqValid[i] == ali->nSites) targetShift++; |
|
ali->target = targetShift; |
|
} |
|
|
|
|
|
if (nValidSeqs < ali->nSeqs || nValidSites < ali->nSites) { |
|
letter_t *seqsReduced = (letter_t *) |
|
malloc(nValidSites * nValidSeqs * sizeof(letter_t)); |
|
for (int i = 0; i < nValidSites * nValidSeqs; i++) seqsReduced[i] = 0; |
|
int sx = 0; |
|
for (int s = 0; s < ali->nSeqs; s++) |
|
if (seqValid[s] == ali->nSites) { |
|
int ix = 0; |
|
for (int i = 0; i < ali->nSites; i++) { |
|
if (siteValid[i] == 1) { |
|
seqsReduced[ix + sx * nValidSites] = seq(s, i); |
|
ix++; |
|
} |
|
} |
|
sx++; |
|
} |
|
|
|
|
|
free(ali->sequences); |
|
ali->nSeqs = nValidSeqs; |
|
ali->nSites = nValidSites; |
|
ali->sequences = (letter_t *) |
|
malloc(nValidSites * nValidSeqs * sizeof(letter_t)); |
|
for (int i = 0; i < nValidSites * nValidSeqs; i++) |
|
ali->sequences[i] = 0; |
|
for (int s = 0; s < nValidSeqs; s++) |
|
for (int i = 0; i < nValidSites; i++) |
|
seq(s, i) = seqsReduced[i + s * nValidSites]; |
|
free(seqsReduced); |
|
} |
|
|
|
|
|
for (int s = 0; s < ali->nSeqs; s++) |
|
for (int i = 0; i < ali->nSites; i++) |
|
if (seq(s, i) < 0) seq(s, i) += ali->nCodes; |
|
|
|
|
|
ali->weights = (numeric_t *) malloc(ali->nSeqs * sizeof(numeric_t)); |
|
for (int s = 0; s < ali->nSeqs; s++) ali->weights[s] = 1.0; |
|
ali->nEff = (numeric_t) ali->nSeqs; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return ali; |
|
} |
|
|
|
void MSAReadSeq(char *seq, FILE *fpAli) { |
|
|
|
char buf[BUFFER_SIZE]; |
|
|
|
char c = fgetc(fpAli); |
|
ungetc(c, fpAli); |
|
seq[0] = '\0'; |
|
while (c != '>' && !feof(fpAli)) { |
|
fgetstr(buf, fpAli); |
|
strcat(seq, buf); |
|
|
|
c = fgetc(fpAli); |
|
ungetc(c, fpAli); |
|
} |
|
} |
|
|
|
letter_t MSAReadCode(char c, char *alphabet, int nCodes) { |
|
|
|
|
|
|
|
|
|
|
|
letter_t i = 0; |
|
|
|
|
|
if (alphabet == codesAA) if (c == '.') c = '-'; |
|
|
|
|
|
while ((i < nCodes - 1) && toupper(c) != alphabet[i]) i++; |
|
if (c != alphabet[i] && toupper(c) == alphabet[i]) i -= nCodes; |
|
|
|
|
|
if (i > 0 && toupper(c) != alphabet[i]) i = nCodes; |
|
return i; |
|
} |
|
|
|
void MSACountMarginals(alignment_t *ali, options_t *options) { |
|
|
|
|
|
|
|
if (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE) { |
|
|
|
ali->nCodes = strlen(ali->alphabet) - 1; |
|
|
|
|
|
int nFi = ali->nSites * ali->nCodes; |
|
ali->fi = (numeric_t *) malloc(nFi * sizeof(numeric_t)); |
|
for (int i = 0; i < nFi; i++) ali->fi[i] = 0.0; |
|
|
|
for (int s = 0; s < ali->nSeqs; s++) |
|
for (int i = 0; i < ali->nSites; i++) |
|
if (seq(s, i) > 0) |
|
fi(i, seq(s, i) - 1) += ali->weights[s]; |
|
|
|
|
|
int nFij = ali->nSites * (ali->nSites - 1) / 2 * ali->nCodes * ali->nCodes; |
|
ali->fij = (numeric_t *) malloc(nFij * sizeof(numeric_t)); |
|
for (int i = 0; i < nFij; i++) ali->fij[i] = 0.0; |
|
|
|
for (int s = 0; s < ali->nSeqs; s++) |
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) |
|
if (seq(s, i) > 0) if(seq(s, j) > 0) |
|
fij(i, j, seq(s, i) - 1, seq(s, j) - 1) |
|
+= ali->weights[s]; |
|
|
|
|
|
for (int i = 0; i < ali->nSites; i++) { |
|
double fsum = 0.0; |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
fsum += fi(i, ai); |
|
if (fsum != 0) { |
|
double fsumInv = 1.0 / fsum; |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
fi(i, ai) *= fsumInv; |
|
} else { |
|
|
|
numeric_t flatF = 1.0 / ((numeric_t) ali->nCodes); |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
fi(i, ai) = flatF; |
|
} |
|
} |
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) { |
|
double fsum = 0.0; |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
for (int aj = 0; aj < ali->nCodes; aj++) |
|
fsum += fij(i, j, ai, aj); |
|
if (fsum != 0) { |
|
double fsumInv = 1.0 / fsum; |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
for (int aj = 0; aj < ali->nCodes; aj++) |
|
fij(i, j, ai, aj) *= fsumInv; |
|
} else { |
|
|
|
numeric_t flatF = 1.0 / ((numeric_t) (ali->nCodes * ali->nCodes)); |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
for (int aj = 0; aj < ali->nCodes; aj++) |
|
fij(i, j, ai, aj) = flatF; |
|
} |
|
} |
|
|
|
} else { |
|
|
|
numeric_t Zinv = 1.0 / ali->nEff; |
|
|
|
|
|
int nFi = ali->nSites * ali->nCodes; |
|
ali->fi = (numeric_t *) malloc(nFi * sizeof(numeric_t)); |
|
for (int i = 0; i < nFi; i++) ali->fi[i] = 0.0; |
|
|
|
for (int s = 0; s < ali->nSeqs; s++) |
|
for (int i = 0; i < ali->nSites; i++) |
|
fi(i, seq(s, i)) += ali->weights[s] * Zinv; |
|
|
|
|
|
int nFij = ali->nSites * (ali->nSites - 1) / 2 * ali->nCodes * ali->nCodes; |
|
ali->fij = (numeric_t *) malloc(nFij * sizeof(numeric_t)); |
|
for (int i = 0; i < nFij; i++) ali->fij[i] = 0.0; |
|
|
|
for (int s = 0; s < ali->nSeqs; s++) |
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) |
|
fij(i, j, seq(s, i), seq(s, j)) += ali->weights[s] * Zinv; |
|
} |
|
} |
|
|
|
void MSAFree(alignment_t *ali, options_t *options) { |
|
|
|
if (ali->names && ali->names[0]) |
|
for (int i = 0; i < ali->nSeqs; i++) free(ali->names[i]); |
|
free(ali->names); |
|
free(ali->sequences); |
|
free(ali->weights); |
|
free(ali->fi); |
|
free(ali->fij); |
|
|
|
|
|
free(options); |
|
} |
|
|
|
#define OUTPUT_PRECISION float |
|
void OutputParametersSite(char *outputFile, const numeric_t *x, |
|
alignment_t *ali) { |
|
FILE *fpOutput = NULL; |
|
fpOutput = fopen(outputFile, "w"); |
|
if (fpOutput != NULL) { |
|
|
|
fwrite(&(ali->nSites), sizeof(ali->nSites), 1, fpOutput); |
|
|
|
|
|
if (ali->target >= 0) { |
|
for (int i = 0; i < ali->nSites; i++) { |
|
char c = (char) ali->alphabet[seq(ali->target, i)]; |
|
fwrite(&c, sizeof(char), 1, fpOutput); |
|
} |
|
} else { |
|
char c = ali->alphabet[0]; |
|
for (int i = 0; i < ali->nSites; i++) |
|
fwrite(&c, sizeof(c), 1, fpOutput); |
|
} |
|
|
|
|
|
if (ali->target >= 0) { |
|
for (int i = 0; i < ali->nSites; i++) { |
|
int ix = ali->offsets[i]; |
|
fwrite(&ix, sizeof(ix), 1, fpOutput); |
|
} |
|
} else { |
|
for (int i = 0; i < ali->nSites; i++) { |
|
int ix = i + 1; |
|
fwrite(&ix, sizeof(ix), 1, fpOutput); |
|
} |
|
} |
|
|
|
|
|
for (int x = 0; x < 2; x++) |
|
for (int i = 0; i < ali->nSites; i++) |
|
for (int ai = 0; ai < ali->nCodes; ai++) { |
|
OUTPUT_PRECISION f = (OUTPUT_PRECISION) fi(i, ai); |
|
fwrite(&f, sizeof(f), 1, fpOutput); |
|
} |
|
|
|
|
|
for (int i = 0; i < ali->nSites; i++) |
|
for (int ai = 0; ai < ali->nCodes; ai++) { |
|
OUTPUT_PRECISION h = (OUTPUT_PRECISION) xHi(i, ai); |
|
fwrite(&h, sizeof(h), 1, fpOutput); |
|
} |
|
|
|
fclose(fpOutput); |
|
} else { |
|
fprintf(stderr, "Error writing parameters\n"); |
|
exit(1); |
|
} |
|
} |
|
|
|
void OutputParametersFull(char *outputFile, const numeric_t *x, |
|
alignment_t *ali, options_t *options) { |
|
|
|
FILE *fpOutput = NULL; |
|
fpOutput = fopen(outputFile, "w"); |
|
if (fpOutput != NULL) { |
|
|
|
int32_t nSites = (int32_t) ali->nSites; |
|
fwrite(&nSites, sizeof(nSites), 1, fpOutput); |
|
|
|
|
|
int32_t nCodes = (int32_t) ali->nCodes; |
|
fwrite(&nCodes, sizeof(nCodes), 1, fpOutput); |
|
|
|
|
|
int32_t nSeqs = (int32_t) ali->nSeqs; |
|
fwrite(&nSeqs, sizeof(nSeqs), 1, fpOutput); |
|
|
|
|
|
int32_t nSkippedSeqs = (int32_t) ali->nSkippedSeqs; |
|
fwrite(&nSkippedSeqs, sizeof(nSkippedSeqs), 1, fpOutput); |
|
|
|
|
|
int32_t maxIter = (int32_t) options->maxIter; |
|
fwrite(&maxIter, sizeof(maxIter), 1, fpOutput); |
|
|
|
|
|
OUTPUT_PRECISION theta = (OUTPUT_PRECISION) options->theta; |
|
fwrite(&theta, sizeof(theta), 1, fpOutput); |
|
|
|
|
|
OUTPUT_PRECISION lh = (OUTPUT_PRECISION) options->lambdaH; |
|
fwrite(&lh, sizeof(lh), 1, fpOutput); |
|
|
|
|
|
OUTPUT_PRECISION le = (OUTPUT_PRECISION) options->lambdaE; |
|
fwrite(&le, sizeof(le), 1, fpOutput); |
|
|
|
|
|
OUTPUT_PRECISION lg = (OUTPUT_PRECISION) options->lambdaGroup; |
|
fwrite(&lg, sizeof(lg), 1, fpOutput); |
|
|
|
|
|
OUTPUT_PRECISION nEff = (OUTPUT_PRECISION) ali->nEff; |
|
fwrite(&nEff, sizeof(nEff), 1, fpOutput); |
|
|
|
|
|
int isGapped = (options->estimatorMAP == INFER_MAP_PLM_GAPREDUCE); |
|
for (int i = 0; i < ali->nCodes; i++) { |
|
int8_t letter = (int8_t) ali->alphabet[i + isGapped]; |
|
fwrite(&letter, sizeof(letter), 1, fpOutput); |
|
} |
|
|
|
|
|
int skipix = 0, reducedix = 0; |
|
for (int s = 0; s < ali->nSeqs + ali->nSkippedSeqs; s++) { |
|
if (skipix < ali->nSkippedSeqs && s == ali->skippedSeqs[skipix]) { |
|
|
|
OUTPUT_PRECISION w = (OUTPUT_PRECISION) 0; |
|
fwrite(&w, sizeof(w), 1, fpOutput); |
|
skipix++; |
|
} else { |
|
numeric_t nNeighbors = ali->weights[reducedix]; |
|
nNeighbors = 1.0 / (nNeighbors * options->scale); |
|
OUTPUT_PRECISION w = (OUTPUT_PRECISION) nNeighbors; |
|
fwrite(&w, sizeof(w), 1, fpOutput); |
|
reducedix++; |
|
} |
|
} |
|
|
|
|
|
if (ali->target >= 0) { |
|
for (int i = 0; i < ali->nSites; i++) { |
|
int8_t c = (int8_t) ali->alphabet[seq(ali->target, i)]; |
|
fwrite(&c, sizeof(c), 1, fpOutput); |
|
} |
|
} else { |
|
int8_t c = (int8_t) ali->alphabet[0]; |
|
for (int i = 0; i < ali->nSites; i++) |
|
fwrite(&c, sizeof(c), 1, fpOutput); |
|
} |
|
|
|
|
|
if (ali->target >= 0) { |
|
for (int i = 0; i < ali->nSites; i++) { |
|
int32_t ix = (int32_t) ali->offsets[i]; |
|
fwrite(&ix, sizeof(ix), 1, fpOutput); |
|
} |
|
} else { |
|
for (int i = 0; i < ali->nSites; i++) { |
|
int32_t ix = (int32_t) i + 1; |
|
fwrite(&ix, sizeof(ix), 1, fpOutput); |
|
} |
|
} |
|
|
|
|
|
for (int i = 0; i < ali->nSites; i++) |
|
for (int ai = 0; ai < ali->nCodes; ai++) { |
|
OUTPUT_PRECISION f = (OUTPUT_PRECISION) fi(i, ai); |
|
fwrite(&f, sizeof(f), 1, fpOutput); |
|
} |
|
|
|
|
|
for (int i = 0; i < ali->nSites; i++) |
|
for (int ai = 0; ai < ali->nCodes; ai++) { |
|
OUTPUT_PRECISION h = (OUTPUT_PRECISION) xHi(i, ai); |
|
fwrite(&h, sizeof(h), 1, fpOutput); |
|
} |
|
|
|
|
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
for (int aj = 0; aj < ali->nCodes; aj++) { |
|
OUTPUT_PRECISION f = |
|
(OUTPUT_PRECISION) fij(i, j, ai, aj); |
|
fwrite(&f, sizeof(f), 1, fpOutput); |
|
} |
|
|
|
|
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
for (int aj = 0; aj < ali->nCodes; aj++) { |
|
OUTPUT_PRECISION e = |
|
(OUTPUT_PRECISION) xEij(i, j, ai, aj); |
|
fwrite(&e, sizeof(e), 1, fpOutput); |
|
} |
|
fclose(fpOutput); |
|
} else { |
|
fprintf(stderr, "Error writing parameters\n"); |
|
exit(1); |
|
} |
|
} |
|
#undef OUTPUT_PRECISION |
|
|
|
void OutputCouplingScores(char *couplingsFile, const numeric_t *x, |
|
alignment_t *ali, options_t *options) { |
|
FILE *fpOutput = NULL; |
|
fpOutput = fopen(couplingsFile, "w"); |
|
if (fpOutput != NULL) { |
|
|
|
numeric_t *couplings = |
|
(numeric_t *) malloc((ali->nSites * (ali->nSites - 1) / 2) |
|
* sizeof(numeric_t)); |
|
|
|
for (int i = 0; i < ali->nSites * (ali->nSites - 1) / 2; |
|
i++) couplings[i] = 0; |
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) { |
|
|
|
numeric_t norm = 0.0; |
|
for (int ai = 0; ai < ali->nCodes; ai++) |
|
for (int aj = 0; aj < ali->nCodes; aj++) |
|
norm += xEij(i, j, ai, aj) * xEij(i, j, ai, aj); |
|
norm = sqrt(norm); |
|
coupling(i, j) = norm; |
|
} |
|
numeric_t nPairs = |
|
((numeric_t) ((ali->nSites) * (ali->nSites - 1))) / 2.0; |
|
|
|
|
|
if (!options->zeroAPC) { |
|
|
|
numeric_t C_avg = 0.0; |
|
numeric_t *C_pos_avg = |
|
(numeric_t *) malloc(ali->nSites * sizeof(numeric_t)); |
|
for (int i = 0; i < ali->nSites; i++) { |
|
C_pos_avg[i] = 0.0; |
|
} |
|
for (int i = 0; i < ali->nSites - 1; i++) { |
|
for (int j = i + 1; j < ali->nSites; j++) { |
|
C_pos_avg[i] += |
|
coupling(i, j) / (numeric_t) (ali->nSites - 1); |
|
C_pos_avg[j] += |
|
coupling(i, j) / (numeric_t) (ali->nSites - 1); |
|
C_avg += coupling(i, j) / nPairs; |
|
} |
|
} |
|
|
|
|
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) |
|
coupling(i, j) = |
|
coupling(i, j) - C_pos_avg[i] * C_pos_avg[j] / C_avg; |
|
} |
|
|
|
|
|
if (ali->target >= 0) { |
|
|
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) { |
|
char ai = (char) ali->alphabet[seq(ali->target, i)]; |
|
char aj = (char) ali->alphabet[seq(ali->target, j)]; |
|
fprintf(fpOutput, "%d %c %d %c 0 %f\n", |
|
ali->offsets[i], ai, ali->offsets[j], aj, |
|
coupling(i, j)); |
|
} |
|
} else { |
|
for (int i = 0; i < ali->nSites - 1; i++) |
|
for (int j = i + 1; j < ali->nSites; j++) |
|
fprintf(fpOutput, "%d - %d - 0 %f\n", i + 1, j + 1, |
|
coupling(i, j)); |
|
} |
|
|
|
fclose(fpOutput); |
|
} else { |
|
fprintf(stderr, "Error writing coupling scores\n"); |
|
exit(1); |
|
} |
|
} |