Spaces:
No application file
No application file
File size: 7,789 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# Copyright 2009 by Tiago Antao <[email protected]>. All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Control GenePop through an easier interface.
This interface is less efficient than the standard GenePopControler
"""
from .Controller import GenePopController
from Bio.PopGen import GenePop
class EasyController:
"""Define a class for an easier interface with the GenePop program."""
def __init__(self, fname, genepop_dir=None):
"""Initialize the controller.
genepop_dir is the directory where GenePop is.
The binary should be called Genepop (capital G)
"""
self._fname = fname
self._controller = GenePopController(genepop_dir)
self.__fst_pair_locus = {} # More caches like this needed!
self.__allele_frequency = {} # More caches like this needed!
def get_basic_info(self):
"""Obtain the population list and loci list from the file."""
with open(self._fname) as f:
rec = GenePop.read(f)
return rec.pop_list, rec.loci_list
# 1.3
def test_hw_pop(self, pop_pos, test_type="probability"):
"""Perform Hardy-Weinberg test on the given position."""
if test_type == "deficiency":
hw_res = self._controller.test_pop_hz_deficiency(self._fname)
elif test_type == "excess":
hw_res = self._controller.test_pop_hz_excess(self._fname)
else:
loci_res, hw_res, fisher_full = self._controller.test_pop_hz_prob(
self._fname, ".P"
)
for i in range(pop_pos - 1):
next(hw_res)
return next(hw_res)
# 1.4
def test_hw_global(
self,
test_type="deficiency",
enum_test=True,
dememorization=10000,
batches=20,
iterations=5000,
):
"""Perform Hardy-Weinberg global Heterozygote test."""
if test_type == "deficiency":
pop_res, loc_res, all = self._controller.test_global_hz_deficiency(
self._fname, enum_test, dememorization, batches, iterations
)
else:
pop_res, loc_res, all = self._controller.test_global_hz_excess(
self._fname, enum_test, dememorization, batches, iterations
)
return list(pop_res), list(loc_res), all
# 2.1
def test_ld_all_pair(
self, locus1, locus2, dememorization=10000, batches=20, iterations=5000
):
"""Test for linkage disequilibrium for each pair of loci in each population."""
all_ld = self._controller.test_ld(
self._fname, dememorization, batches, iterations
)[1]
for ld_case in all_ld:
(l1, l2), result = ld_case
if (l1 == locus1 and l2 == locus2) or (l1 == locus2 and l2 == locus1):
return result
def estimate_nm(self):
"""Estimate Nm. Just a simple bridge."""
return self._controller.estimate_nm(self._fname)
def get_heterozygosity_info(self, pop_pos, locus_name):
"""Return the heterozygosity info for a certain locus on a population.
Returns (Expected homozygotes, observed homozygotes,
Expected heterozygotes, observed heterozygotes)
"""
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
pop_iter, loc_iter = geno_freqs
pops = list(pop_iter)
return pops[pop_pos][1][locus_name][1]
def get_genotype_count(self, pop_pos, locus_name):
"""Return the genotype counts for a certain population and locus."""
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
pop_iter, loc_iter = geno_freqs
pop_iter = list(pop_iter)
return pop_iter[pop_pos][1][locus_name][0]
def get_fis(self, pop_pos, locus_name):
"""Return the Fis for a certain population and locus.
Below CW means Cockerham and Weir and RH means Robertson and Hill.
Returns a pair:
- dictionary [allele] = (repetition count, frequency, Fis CW )
with information for each allele
- a triple with total number of alleles, Fis CW, Fis RH
"""
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
pop_iter, loc_iter = geno_freqs
pops = list(pop_iter)
return pops[pop_pos][1][locus_name][2:]
def get_alleles(self, pop_pos, locus_name):
"""Return the alleles for a certain population and locus."""
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
pop_iter, loc_iter = geno_freqs
pop_iter = list(pop_iter)
return list(pop_iter[pop_pos][1][locus_name][2].keys())
def get_alleles_all_pops(self, locus_name):
"""Return the alleles for a certain population and locus."""
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
pop_iter, loc_iter = geno_freqs
for locus_info in loc_iter:
if locus_info[0] == locus_name:
return locus_info[1]
def get_allele_frequency(self, pop_pos, locus_name):
"""Calculate the allele frequency for a certain locus on a population."""
if len(self.__allele_frequency) == 0:
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
pop_iter, loc_iter = geno_freqs
for locus_info in loc_iter:
if locus_info[0] is None:
self.__allele_frequency[locus_info[0]] = None, None
else:
self.__allele_frequency[locus_info[0]] = locus_info[1:]
info = self.__allele_frequency[locus_name]
pop_name, freqs, total = info[1][pop_pos]
allele_freq = {}
alleles = info[0]
for i, allele in enumerate(alleles):
allele_freq[allele] = freqs[i]
return total, allele_freq
def get_multilocus_f_stats(self):
"""Return the multilocus F stats.
Explain averaging.
Returns Fis(CW), Fst, Fit
"""
return self._controller.calc_fst_all(self._fname)[0]
def get_f_stats(self, locus_name):
"""Return F stats for a locus.
Returns Fis(CW), Fst, Fit, Qintra, Qinter
"""
loci_iter = self._controller.calc_fst_all(self._fname)[1]
for name, fis, fst, fit, qintra, qinter in loci_iter:
if name == locus_name:
return fis, fst, fit, qintra, qinter
def get_avg_fis(self):
"""Calculate identity-base average Fis."""
return self._controller.calc_diversities_fis_with_identity(self._fname)[1]
def get_avg_fst_pair(self):
"""Calculate Allele size-base average Fis for all population pairs."""
return self._controller.calc_fst_pair(self._fname)[1]
def get_avg_fst_pair_locus(self, locus):
"""Calculate Allele size-base average Fis for all population pairs of the given locus."""
if len(self.__fst_pair_locus) == 0:
iter = self._controller.calc_fst_pair(self._fname)[0]
for locus_info in iter:
self.__fst_pair_locus[locus_info[0]] = locus_info[1]
return self.__fst_pair_locus[locus]
def calc_ibd(self, is_diplo=True, stat="a", scale="Log", min_dist=0.00001):
"""Calculate isolation by distance statistics for Diploid or Haploid."""
if is_diplo:
return self._controller.calc_ibd_diplo(self._fname, stat, scale, min_dist)
else:
return self._controller.calc_ibd_haplo(self._fname, stat, scale, min_dist)
|