File size: 7,789 Bytes
b7731cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# Copyright 2009 by Tiago Antao <[email protected]>.  All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.

"""Control GenePop through an easier interface.

This interface is less efficient than the standard GenePopControler

"""

from .Controller import GenePopController
from Bio.PopGen import GenePop


class EasyController:
    """Define a class for an easier interface with the GenePop program."""

    def __init__(self, fname, genepop_dir=None):
        """Initialize the controller.

        genepop_dir is the directory where GenePop is.

        The binary should be called Genepop (capital G)
        """
        self._fname = fname
        self._controller = GenePopController(genepop_dir)
        self.__fst_pair_locus = {}  # More caches like this needed!
        self.__allele_frequency = {}  # More caches like this needed!

    def get_basic_info(self):
        """Obtain the population list and loci list from the file."""
        with open(self._fname) as f:
            rec = GenePop.read(f)
        return rec.pop_list, rec.loci_list

    # 1.3
    def test_hw_pop(self, pop_pos, test_type="probability"):
        """Perform Hardy-Weinberg test on the given position."""
        if test_type == "deficiency":
            hw_res = self._controller.test_pop_hz_deficiency(self._fname)
        elif test_type == "excess":
            hw_res = self._controller.test_pop_hz_excess(self._fname)
        else:
            loci_res, hw_res, fisher_full = self._controller.test_pop_hz_prob(
                self._fname, ".P"
            )
        for i in range(pop_pos - 1):
            next(hw_res)
        return next(hw_res)

    # 1.4
    def test_hw_global(
        self,
        test_type="deficiency",
        enum_test=True,
        dememorization=10000,
        batches=20,
        iterations=5000,
    ):
        """Perform Hardy-Weinberg global Heterozygote test."""
        if test_type == "deficiency":
            pop_res, loc_res, all = self._controller.test_global_hz_deficiency(
                self._fname, enum_test, dememorization, batches, iterations
            )
        else:
            pop_res, loc_res, all = self._controller.test_global_hz_excess(
                self._fname, enum_test, dememorization, batches, iterations
            )
        return list(pop_res), list(loc_res), all

    # 2.1
    def test_ld_all_pair(
        self, locus1, locus2, dememorization=10000, batches=20, iterations=5000
    ):
        """Test for linkage disequilibrium for each pair of loci in each population."""
        all_ld = self._controller.test_ld(
            self._fname, dememorization, batches, iterations
        )[1]
        for ld_case in all_ld:
            (l1, l2), result = ld_case
            if (l1 == locus1 and l2 == locus2) or (l1 == locus2 and l2 == locus1):
                return result

    def estimate_nm(self):
        """Estimate Nm. Just a simple bridge."""
        return self._controller.estimate_nm(self._fname)

    def get_heterozygosity_info(self, pop_pos, locus_name):
        """Return the heterozygosity info for a certain locus on a population.

        Returns (Expected homozygotes, observed homozygotes,
        Expected heterozygotes, observed heterozygotes)
        """
        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
        pop_iter, loc_iter = geno_freqs
        pops = list(pop_iter)
        return pops[pop_pos][1][locus_name][1]

    def get_genotype_count(self, pop_pos, locus_name):
        """Return the genotype counts for a certain population and locus."""
        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
        pop_iter, loc_iter = geno_freqs
        pop_iter = list(pop_iter)
        return pop_iter[pop_pos][1][locus_name][0]

    def get_fis(self, pop_pos, locus_name):
        """Return the Fis for a certain population and locus.

        Below CW means Cockerham and Weir and RH means Robertson and Hill.

        Returns a pair:

        - dictionary [allele] = (repetition count, frequency, Fis CW )
          with information for each allele
        - a triple with total number of alleles, Fis CW, Fis RH

        """
        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
        pop_iter, loc_iter = geno_freqs
        pops = list(pop_iter)
        return pops[pop_pos][1][locus_name][2:]

    def get_alleles(self, pop_pos, locus_name):
        """Return the alleles for a certain population and locus."""
        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
        pop_iter, loc_iter = geno_freqs
        pop_iter = list(pop_iter)
        return list(pop_iter[pop_pos][1][locus_name][2].keys())

    def get_alleles_all_pops(self, locus_name):
        """Return the alleles for a certain population and locus."""
        geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
        pop_iter, loc_iter = geno_freqs
        for locus_info in loc_iter:
            if locus_info[0] == locus_name:
                return locus_info[1]

    def get_allele_frequency(self, pop_pos, locus_name):
        """Calculate the allele frequency for a certain locus on a population."""
        if len(self.__allele_frequency) == 0:
            geno_freqs = self._controller.calc_allele_genotype_freqs(self._fname)
            pop_iter, loc_iter = geno_freqs
            for locus_info in loc_iter:
                if locus_info[0] is None:
                    self.__allele_frequency[locus_info[0]] = None, None
                else:
                    self.__allele_frequency[locus_info[0]] = locus_info[1:]
        info = self.__allele_frequency[locus_name]
        pop_name, freqs, total = info[1][pop_pos]
        allele_freq = {}
        alleles = info[0]
        for i, allele in enumerate(alleles):
            allele_freq[allele] = freqs[i]
        return total, allele_freq

    def get_multilocus_f_stats(self):
        """Return the multilocus F stats.

        Explain averaging.
        Returns Fis(CW), Fst, Fit
        """
        return self._controller.calc_fst_all(self._fname)[0]

    def get_f_stats(self, locus_name):
        """Return F stats for a locus.

        Returns Fis(CW), Fst, Fit, Qintra, Qinter
        """
        loci_iter = self._controller.calc_fst_all(self._fname)[1]
        for name, fis, fst, fit, qintra, qinter in loci_iter:
            if name == locus_name:
                return fis, fst, fit, qintra, qinter

    def get_avg_fis(self):
        """Calculate identity-base average Fis."""
        return self._controller.calc_diversities_fis_with_identity(self._fname)[1]

    def get_avg_fst_pair(self):
        """Calculate Allele size-base average Fis for all population pairs."""
        return self._controller.calc_fst_pair(self._fname)[1]

    def get_avg_fst_pair_locus(self, locus):
        """Calculate Allele size-base average Fis for all population pairs of the given locus."""
        if len(self.__fst_pair_locus) == 0:
            iter = self._controller.calc_fst_pair(self._fname)[0]
            for locus_info in iter:
                self.__fst_pair_locus[locus_info[0]] = locus_info[1]
        return self.__fst_pair_locus[locus]

    def calc_ibd(self, is_diplo=True, stat="a", scale="Log", min_dist=0.00001):
        """Calculate isolation by distance statistics for Diploid or Haploid."""
        if is_diplo:
            return self._controller.calc_ibd_diplo(self._fname, stat, scale, min_dist)
        else:
            return self._controller.calc_ibd_haplo(self._fname, stat, scale, min_dist)