Spaces:
No application file
No application file
# Copyright 2009 by Tiago Antao <[email protected]>. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Control GenePop through an easier interface. | |
This interface is less efficient than the standard GenePopControler | |
""" | |
from .Controller import GenePopController | |
from Bio.PopGen import GenePop | |
class EasyController: | |
"""Define a class for an easier interface with the GenePop program.""" | |
def __init__(self, fname, genepop_dir=None): | |
"""Initialize the controller. | |
genepop_dir is the directory where GenePop is. | |
The binary should be called Genepop (capital G) | |
""" | |
self._fname = fname | |
self._controller = GenePopController(genepop_dir) | |
self.__fst_pair_locus = {} # More caches like this needed! | |
self.__allele_frequency = {} # More caches like this needed! | |
def get_basic_info(self): | |
"""Obtain the population list and loci list from the file.""" | |
with open(self._fname) as f: | |
rec = GenePop.read(f) | |
return rec.pop_list, rec.loci_list | |
# 1.3 | |
def test_hw_pop(self, pop_pos, test_type="probability"): | |
"""Perform Hardy-Weinberg test on the given position.""" | |
if test_type == "deficiency": | |
hw_res = self._controller.test_pop_hz_deficiency(self._fname) | |
elif test_type == "excess": | |
hw_res = self._controller.test_pop_hz_excess(self._fname) | |
else: | |
loci_res, hw_res, fisher_full = self._controller.test_pop_hz_prob( | |
self._fname, ".P" | |
) | |
for i in range(pop_pos - 1): | |
next(hw_res) | |
return next(hw_res) | |
# 1.4 | |
def test_hw_global( | |
self, | |
test_type="deficiency", | |
enum_test=True, | |
dememorization=10000, | |
batches=20, | |
iterations=5000, | |
): | |
"""Perform Hardy-Weinberg global Heterozygote test.""" | |
if test_type == "deficiency": | |
pop_res, loc_res, all = self._controller.test_global_hz_deficiency( | |
self._fname, enum_test, dememorization, batches, iterations | |
) | |
else: | |
pop_res, loc_res, all = self._controller.test_global_hz_excess( | |
self._fname, enum_test, dememorization, batches, iterations | |
) | |
return list(pop_res), list(loc_res), all | |
# 2.1 | |
def test_ld_all_pair( | |
self, locus1, locus2, dememorization=10000, batches=20, iterations=5000 | |
): | |
"""Test for linkage disequilibrium for each pair of loci in each population.""" | |
all_ld = self._controller.test_ld( | |
self._fname, dememorization, batches, iterations | |
)[1] | |
for ld_case in all_ld: | |
(l1, l2), result = ld_case | |
if (l1 == locus1 and l2 == locus2) or (l1 == locus2 and l2 == locus1): | |
return result | |
def estimate_nm(self): | |
"""Estimate Nm. Just a simple bridge.""" | |
return self._controller.estimate_nm(self._fname) | |
def get_heterozygosity_info(self, pop_pos, locus_name): | |
"""Return the heterozygosity info for a certain locus on a population. | |
Returns (Expected homozygotes, observed homozygotes, | |
Expected heterozygotes, observed heterozygotes) | |
""" | |
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname) | |
pop_iter, loc_iter = geno_freqs | |
pops = list(pop_iter) | |
return pops[pop_pos][1][locus_name][1] | |
def get_genotype_count(self, pop_pos, locus_name): | |
"""Return the genotype counts for a certain population and locus.""" | |
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname) | |
pop_iter, loc_iter = geno_freqs | |
pop_iter = list(pop_iter) | |
return pop_iter[pop_pos][1][locus_name][0] | |
def get_fis(self, pop_pos, locus_name): | |
"""Return the Fis for a certain population and locus. | |
Below CW means Cockerham and Weir and RH means Robertson and Hill. | |
Returns a pair: | |
- dictionary [allele] = (repetition count, frequency, Fis CW ) | |
with information for each allele | |
- a triple with total number of alleles, Fis CW, Fis RH | |
""" | |
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname) | |
pop_iter, loc_iter = geno_freqs | |
pops = list(pop_iter) | |
return pops[pop_pos][1][locus_name][2:] | |
def get_alleles(self, pop_pos, locus_name): | |
"""Return the alleles for a certain population and locus.""" | |
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname) | |
pop_iter, loc_iter = geno_freqs | |
pop_iter = list(pop_iter) | |
return list(pop_iter[pop_pos][1][locus_name][2].keys()) | |
def get_alleles_all_pops(self, locus_name): | |
"""Return the alleles for a certain population and locus.""" | |
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname) | |
pop_iter, loc_iter = geno_freqs | |
for locus_info in loc_iter: | |
if locus_info[0] == locus_name: | |
return locus_info[1] | |
def get_allele_frequency(self, pop_pos, locus_name): | |
"""Calculate the allele frequency for a certain locus on a population.""" | |
if len(self.__allele_frequency) == 0: | |
geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname) | |
pop_iter, loc_iter = geno_freqs | |
for locus_info in loc_iter: | |
if locus_info[0] is None: | |
self.__allele_frequency[locus_info[0]] = None, None | |
else: | |
self.__allele_frequency[locus_info[0]] = locus_info[1:] | |
info = self.__allele_frequency[locus_name] | |
pop_name, freqs, total = info[1][pop_pos] | |
allele_freq = {} | |
alleles = info[0] | |
for i, allele in enumerate(alleles): | |
allele_freq[allele] = freqs[i] | |
return total, allele_freq | |
def get_multilocus_f_stats(self): | |
"""Return the multilocus F stats. | |
Explain averaging. | |
Returns Fis(CW), Fst, Fit | |
""" | |
return self._controller.calc_fst_all(self._fname)[0] | |
def get_f_stats(self, locus_name): | |
"""Return F stats for a locus. | |
Returns Fis(CW), Fst, Fit, Qintra, Qinter | |
""" | |
loci_iter = self._controller.calc_fst_all(self._fname)[1] | |
for name, fis, fst, fit, qintra, qinter in loci_iter: | |
if name == locus_name: | |
return fis, fst, fit, qintra, qinter | |
def get_avg_fis(self): | |
"""Calculate identity-base average Fis.""" | |
return self._controller.calc_diversities_fis_with_identity(self._fname)[1] | |
def get_avg_fst_pair(self): | |
"""Calculate Allele size-base average Fis for all population pairs.""" | |
return self._controller.calc_fst_pair(self._fname)[1] | |
def get_avg_fst_pair_locus(self, locus): | |
"""Calculate Allele size-base average Fis for all population pairs of the given locus.""" | |
if len(self.__fst_pair_locus) == 0: | |
iter = self._controller.calc_fst_pair(self._fname)[0] | |
for locus_info in iter: | |
self.__fst_pair_locus[locus_info[0]] = locus_info[1] | |
return self.__fst_pair_locus[locus] | |
def calc_ibd(self, is_diplo=True, stat="a", scale="Log", min_dist=0.00001): | |
"""Calculate isolation by distance statistics for Diploid or Haploid.""" | |
if is_diplo: | |
return self._controller.calc_ibd_diplo(self._fname, stat, scale, min_dist) | |
else: | |
return self._controller.calc_ibd_haplo(self._fname, stat, scale, min_dist) | |