|
import numpy as np |
|
import csv |
|
|
|
def load_GO_annot(filename): |
|
|
|
onts = ['mf', 'bp', 'cc'] |
|
prot2annot = {} |
|
goterms = {ont: [] for ont in onts} |
|
gonames = {ont: [] for ont in onts} |
|
with open(filename, mode='r') as tsvfile: |
|
reader = csv.reader(tsvfile, delimiter='\t') |
|
|
|
|
|
next(reader, None) |
|
goterms[onts[0]] = next(reader) |
|
next(reader, None) |
|
gonames[onts[0]] = next(reader) |
|
|
|
|
|
next(reader, None) |
|
goterms[onts[1]] = next(reader) |
|
next(reader, None) |
|
gonames[onts[1]] = next(reader) |
|
|
|
|
|
next(reader, None) |
|
goterms[onts[2]] = next(reader) |
|
next(reader, None) |
|
gonames[onts[2]] = next(reader) |
|
|
|
next(reader, None) |
|
counts = {ont: np.zeros(len(goterms[ont]), dtype=float) for ont in onts} |
|
for row in reader: |
|
prot, prot_goterms = row[0], row[1:] |
|
prot2annot[prot] = {ont: [] for ont in onts} |
|
for i in range(3): |
|
goterm_indices = [goterms[onts[i]].index(goterm) for goterm in prot_goterms[i].split(',') if goterm != ''] |
|
prot2annot[prot][onts[i]] = np.zeros(len(goterms[onts[i]])) |
|
prot2annot[prot][onts[i]][goterm_indices] = 1.0 |
|
counts[onts[i]][goterm_indices] += 1.0 |
|
return prot2annot, goterms, gonames, counts |
|
|
|
|
|
def load_EC_annot(filename): |
|
|
|
prot2annot = {} |
|
with open(filename, mode='r') as tsvfile: |
|
reader = csv.reader(tsvfile, delimiter='\t') |
|
|
|
|
|
next(reader, None) |
|
ec_numbers = {'ec': next(reader)} |
|
next(reader, None) |
|
counts = {'ec': np.zeros(len(ec_numbers['ec']), dtype=float)} |
|
for row in reader: |
|
prot, prot_ec_numbers = row[0], row[1] |
|
ec_indices = [ec_numbers['ec'].index(ec_num) for ec_num in prot_ec_numbers.split(',')] |
|
prot2annot[prot] = {'ec': np.zeros(len(ec_numbers['ec']), dtype=np.int64)} |
|
prot2annot[prot]['ec'][ec_indices] = 1.0 |
|
counts['ec'][ec_indices] += 1 |
|
|