Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /codonalign /__init__.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 1 year ago

raw

history blame contribute delete

30 kB

	# Copyright 2013 by Zheng Ruan ([email protected]).
	# All rights reserved.
	# This code is part of the Biopython distribution and governed by its
	# license. Please see the LICENSE file that should have been included
	# as part of this package.

	"""Code for dealing with Codon Alignments."""

	import copy
	from collections.abc import Mapping, Iterable

	from Bio import BiopythonWarning
	from Bio import BiopythonExperimentalWarning

	from Bio.SeqRecord import SeqRecord
	from Bio.Data import CodonTable

	from Bio.codonalign.codonseq import CodonSeq
	from Bio.codonalign.codonalignment import CodonAlignment, mktest

	import warnings

	warnings.warn(
	"Bio.codonalign is an experimental module which may undergo "
	"significant changes prior to its future official release.",
	BiopythonExperimentalWarning,
	)


	def build(
	pro_align,
	nucl_seqs,
	corr_dict=None,
	gap_char="-",
	unknown="X",
	codon_table=None,
	complete_protein=False,
	anchor_len=10,
	max_score=10,
	):
	"""Build a codon alignment from protein alignment and corresponding nucleotides.

	Arguments:
	- pro_align - a protein MultipleSeqAlignment object
	- nucl_seqs - an object returned by SeqIO.parse or SeqIO.index
	or a collection of SeqRecord.
	- corr_dict - a dict that maps protein id to nucleotide id
	- complete_protein - whether the sequence begins with a start
	codon

	Return a CodonAlignment object.

	The example below answers this Biostars question: https://www.biostars.org/p/89741/

	>>> from Bio.Seq import Seq
	>>> from Bio.SeqRecord import SeqRecord
	>>> from Bio.Align import MultipleSeqAlignment
	>>> from Bio.codonalign import build
	>>> seq1 = SeqRecord(Seq('ATGTCTCGT'), id='pro1')
	>>> seq2 = SeqRecord(Seq('ATGCGT'), id='pro2')
	>>> pro1 = SeqRecord(Seq('MSR'), id='pro1')
	>>> pro2 = SeqRecord(Seq('M-R'), id='pro2')
	>>> aln = MultipleSeqAlignment([pro1, pro2])
	>>> codon_aln = build(aln, [seq1, seq2])
	>>> print(codon_aln)
	CodonAlignment with 2 rows and 9 columns (3 codons)
	ATGTCTCGT pro1
	ATG---CGT pro2

	"""
	# TODO
	# add an option to allow the user to specify the returned object?

	from Bio.Align import MultipleSeqAlignment

	# check the type of object of pro_align
	if not isinstance(pro_align, MultipleSeqAlignment):
	raise TypeError("the first argument should be a MultipleSeqAlignment object")
	# check whether the number of seqs in pro_align and nucl_seqs is
	# the same
	pro_num = len(pro_align)
	if corr_dict is None:
	try:
	nucl_num = len(nucl_seqs)
	except TypeError:
	# nucl_seqs will be an iterator if returned by SeqIO.parse()
	nucl_seqs = tuple(nucl_seqs)
	nucl_num = len(nucl_seqs)
	if pro_num > nucl_num:
	raise ValueError(
	f"Higher Number of SeqRecords in Protein Alignment ({pro_num}) "
	f"than the Number of Nucleotide SeqRecords ({nucl_num}) are found!"
	)

	# Determine the protein sequences and nucl sequences
	# correspondence. If nucl_seqs is a list, tuple or read by
	# SeqIO.parse(), we assume the order of sequences in pro_align
	# and nucl_seqs are the same. If nucl_seqs is a dict or read by
	# SeqIO.index(), we match seqs in pro_align and those in
	# nucl_seq by their id.
	if isinstance(nucl_seqs, Mapping):
	corr_method = 1
	elif isinstance(nucl_seqs, Iterable):
	corr_method = 0
	else:
	raise TypeError(
	"Nucl Sequences Error, Unknown type to assign correspondence method"
	)
	else:
	if not isinstance(corr_dict, dict):
	raise TypeError(
	"corr_dict should be a dict that corresponds "
	"protein id to nucleotide id!"
	)
	if len(corr_dict) >= pro_num:
	if isinstance(nucl_seqs, Mapping):
	pass
	else:
	d = {}
	for record in nucl_seqs:
	key = record.id
	if key in d:
	raise ValueError(f"Duplicate key '{key}'")
	d[key] = record
	nucl_seqs = d
	corr_method = 2
	else:
	raise RuntimeError(
	f"Number of items in corr_dict ({len(corr_dict)}) "
	f"is less than number of protein records ({pro_num})"
	)

	# set up pro-nucl correspondence based on corr_method
	# corr_method = 0, consecutive pairing
	if corr_method == 0:
	pro_nucl_pair = zip(pro_align, nucl_seqs)
	# corr_method = 1, keyword pairing
	elif corr_method == 1:
	nucl_id = set(nucl_seqs.keys())
	pro_id = {i.id for i in pro_align}
	# check if there is pro_id that does not have a nucleotide match
	if pro_id - nucl_id:
	diff = pro_id - nucl_id
	raise ValueError(
	f"Protein Record {', '.join(diff)} cannot find a "
	"nucleotide sequence match, please check the id"
	)
	else:
	pro_nucl_pair = []
	for pro_rec in pro_align:
	pro_nucl_pair.append((pro_rec, nucl_seqs[pro_rec.id]))
	# corr_method = 2, dict pairing
	elif corr_method == 2:
	pro_nucl_pair = []
	for pro_rec in pro_align:
	try:
	nucl_id = corr_dict[pro_rec.id]
	except KeyError:
	print(f"Protein record ({pro_rec.id}) is not in corr_dict!")
	exit(1)
	pro_nucl_pair.append((pro_rec, nucl_seqs[nucl_id]))

	if codon_table is None:
	codon_table = CodonTable.generic_by_id[1]

	codon_aln = []
	shift = False
	for pair in pro_nucl_pair:
	# Beware that the following span corresponds to an ungapped
	# nucleotide sequence.
	corr_span = _check_corr(
	pair[0],
	pair[1],
	gap_char=gap_char,
	codon_table=codon_table,
	complete_protein=complete_protein,
	anchor_len=anchor_len,
	)
	if not corr_span:
	raise ValueError(
	f"Protein Record {pair[0].id} and "
	f"Nucleotide Record {pair[1].id} do not match!"
	)
	else:
	codon_rec = _get_codon_rec(
	pair[0],
	pair[1],
	corr_span,
	gap_char=gap_char,
	complete_protein=complete_protein,
	codon_table=codon_table,
	max_score=max_score,
	)
	codon_aln.append(codon_rec)
	if corr_span[1] == 2:
	shift = True
	if shift:
	return CodonAlignment(_align_shift_recs(codon_aln))
	else:
	return CodonAlignment(codon_aln)


	def _codons2re(codons):
	"""Generate regular expression based on a given list of codons (PRIVATE)."""
	reg = ""
	for i in zip(*codons):
	if len(set(i)) == 1:
	reg += "".join(set(i))
	else:
	reg += "[" + "".join(set(i)) + "]"
	return reg


	def _get_aa_regex(codon_table, stop="*", unknown="X"):
	"""Set up the regular expression of a given CodonTable (PRIVATE).

	>>> from Bio.Data.CodonTable import generic_by_id
	>>> p = generic_by_id[1]
	>>> t = _get_aa_regex(p)
	>>> print(t['A'][0])
	G
	>>> print(t['A'][1])
	C
	>>> print(sorted(list(t['A'][2:])))
	['A', 'C', 'G', 'T', 'U', '[', ']']
	>>> print(sorted(list(t['L'][:5])))
	['C', 'T', 'U', '[', ']']
	>>> print(sorted(list(t['L'][5:9])))
	['T', 'U', '[', ']']
	>>> print(sorted(list(t['L'][9:])))
	['A', 'C', 'G', 'T', 'U', '[', ']']

	"""
	from Bio.Data.CodonTable import CodonTable

	if not isinstance(codon_table, CodonTable):
	raise TypeError("Input table is not a instance of Bio.Data.CodonTable object")
	aa2codon = {}
	for codon, aa in codon_table.forward_table.items():
	aa2codon.setdefault(aa, []).append(codon)
	for aa, codons in aa2codon.items():
	aa2codon[aa] = _codons2re(codons)
	aa2codon[stop] = _codons2re(codon_table.stop_codons)
	aa2codon[unknown] = "..."
	return aa2codon


	def _check_corr(
	pro, nucl, gap_char, codon_table, complete_protein=False, anchor_len=10
	):
	"""Check if the nucleotide can be translated into the protein (PRIVATE).

	Expects two SeqRecord objects.
	"""
	import re

	if not isinstance(pro, SeqRecord) or not isinstance(nucl, SeqRecord):
	raise TypeError(
	"_check_corr accepts two SeqRecord object. Please check your input."
	)

	aa2re = _get_aa_regex(codon_table)
	pro_re = ""
	for aa in pro.seq:
	if aa != gap_char:
	pro_re += aa2re[aa]

	nucl_seq = str(nucl.seq.upper().replace(gap_char, ""))
	match = re.search(pro_re, nucl_seq)
	if match:
	# mode = 0, direct match
	return (match.span(), 0)
	else:
	# Might caused by mismatches or frameshift, using anchors to
	# have a try
	# anchor_len = 10 # adjust this value to test performance
	pro_seq = str(pro.seq).replace(gap_char, "")
	anchors = [
	pro_seq[i : (i + anchor_len)] for i in range(0, len(pro_seq), anchor_len)
	]
	# if the last anchor is less than the specified anchor
	# size, we combine the penultimate and the last anchor
	# together as the last one.
	# TODO: modify this to deal with short sequence with only
	# one anchor.
	if len(anchors[-1]) < anchor_len:
	anchors[-1] = anchors[-2] + anchors[-1]

	pro_re = []
	anchor_distance = 0
	anchor_pos = []
	for i, anchor in enumerate(anchors):
	this_anchor_len = len(anchor)
	qcodon = ""
	fncodon = ""
	# dirty code to deal with the last anchor
	# as the last anchor is combined in the steps
	# above, we need to get the true last anchor to
	# pro_re
	if this_anchor_len == anchor_len:
	for aa in anchor:
	if complete_protein and i == 0:
	qcodon += _codons2re(codon_table.start_codons)
	fncodon += aa2re["X"]
	continue
	qcodon += aa2re[aa]
	fncodon += aa2re["X"]
	match = re.search(qcodon, nucl_seq)
	elif this_anchor_len > anchor_len:
	last_qcodon = ""
	last_fcodon = ""
	for j in range(anchor_len, len(anchor)):
	last_qcodon += aa2re[anchor[j]]
	last_fcodon += aa2re["X"]
	match = re.search(last_qcodon, nucl_seq)
	# build full_pro_re from anchors
	if match:
	anchor_pos.append((match.start(), match.end(), i))
	if this_anchor_len == anchor_len:
	pro_re.append(qcodon)
	else:
	pro_re.append(last_qcodon)
	else:
	if this_anchor_len == anchor_len:
	pro_re.append(fncodon)
	else:
	pro_re.append(last_fcodon)
	full_pro_re = "".join(pro_re)
	match = re.search(full_pro_re, nucl_seq)
	if match:
	# mode = 1, mismatch
	return (match.span(), 1)
	else:
	# check frames of anchors
	# ten frameshift events are allowed in a sequence
	first_anchor = True
	shift_id_pos = 0
	# check the first anchor
	if first_anchor and anchor_pos[0][2] != 0:
	shift_val_lst = [1, 2, 3 * anchor_len - 2, 3 * anchor_len - 1, 0]
	sh_anc = anchors[0]
	for shift_val in shift_val_lst:
	if shift_val == 0:
	qcodon = None
	break
	if shift_val in (1, 2):
	sh_nuc_len = anchor_len * 3 + shift_val
	elif shift_val in (3 * anchor_len - 2, 3 * anchor_len - 1):
	sh_nuc_len = anchor_len * 3 - (3 * anchor_len - shift_val)
	if anchor_pos[0][0] >= sh_nuc_len:
	sh_nuc = nucl_seq[
	anchor_pos[0][0] - sh_nuc_len : anchor_pos[0][0]
	]
	else:
	# this is unlikely to produce the correct output
	sh_nuc = nucl_seq[: anchor_pos[0][0]]
	qcodon, shift_id_pos = _get_shift_anchor_re(
	sh_anc, sh_nuc, shift_val, aa2re, anchor_len, shift_id_pos
	)
	if qcodon is not None and qcodon != -1:
	# pro_re[0] should be '.'*anchor_len, therefore I
	# replace it.
	pro_re[0] = qcodon
	break
	if qcodon == -1:
	warnings.warn(
	f"first frameshift detection failed for {nucl.id}",
	BiopythonWarning,
	)
	# check anchors in the middle
	for i in range(len(anchor_pos) - 1):
	shift_val = (anchor_pos[i + 1][0] - anchor_pos[i][0]) % (3 * anchor_len)
	sh_anc = "".join(anchors[anchor_pos[i][2] : anchor_pos[i + 1][2]])
	sh_nuc = nucl_seq[anchor_pos[i][0] : anchor_pos[i + 1][0]]
	qcodon = None
	if shift_val != 0:
	qcodon, shift_id_pos = _get_shift_anchor_re(
	sh_anc, sh_nuc, shift_val, aa2re, anchor_len, shift_id_pos
	)
	if qcodon is not None and qcodon != -1:
	pro_re[anchor_pos[i][2] : anchor_pos[i + 1][2]] = [qcodon]
	qcodon = None
	elif qcodon == -1:
	warnings.warn(
	f"middle frameshift detection failed for {nucl.id}",
	BiopythonWarning,
	)
	# check the last anchor
	if anchor_pos[-1][2] + 1 == len(anchors) - 1:
	sh_anc = anchors[-1]
	this_anchor_len = len(sh_anc)
	shift_val_lst = [
	1,
	2,
	3 * this_anchor_len - 2,
	3 * this_anchor_len - 1,
	0,
	]
	for shift_val in shift_val_lst:
	if shift_val == 0:
	qcodon = None
	break
	if shift_val in (1, 2):
	sh_nuc_len = this_anchor_len * 3 + shift_val
	elif shift_val in (
	3 * this_anchor_len - 2,
	3 * this_anchor_len - 1,
	):
	sh_nuc_len = this_anchor_len * 3 - (
	3 * this_anchor_len - shift_val
	)
	if len(nucl_seq) - anchor_pos[-1][0] >= sh_nuc_len:
	sh_nuc = nucl_seq[
	anchor_pos[-1][0] : anchor_pos[-1][0] + sh_nuc_len
	]
	else:
	# this is unlikely to produce the correct output
	sh_nuc = nucl_seq[anchor_pos[-1][0] :]
	qcodon, shift_id_pos = _get_shift_anchor_re(
	sh_anc, sh_nuc, shift_val, aa2re, this_anchor_len, shift_id_pos
	)
	if qcodon is not None and qcodon != -1:
	pro_re.pop()
	pro_re[-1] = qcodon
	break
	if qcodon == -1:
	warnings.warn(
	f"last frameshift detection failed for {nucl.id}",
	BiopythonWarning,
	)
	# try global match
	full_pro_re = "".join(pro_re)
	match = re.search(full_pro_re, nucl_seq)
	if match:
	return (match.span(), 2, match)
	else:
	raise RuntimeError(
	f"Protein SeqRecord ({pro.id}) and "
	f"Nucleotide SeqRecord ({nucl.id}) do not match!"
	)


	def _get_shift_anchor_re(sh_anc, sh_nuc, shift_val, aa2re, anchor_len, shift_id_pos):
	"""Find a regular expression matching a potentially shifted anchor (PRIVATE).

	Arguments:
	- sh_anc - shifted anchor sequence
	- sh_nuc - potentially corresponding nucleotide sequence
	of sh_anc
	- shift_val - 1 or 2 indicates forward frame shift, whereas
	3anchor_len-1 or 3anchor_len-2 indicates
	backward shift
	- aa2re - aa to codon re dict
	- anchor_len - length of the anchor
	- shift_id_pos - specify current shift name we are at

	"""
	import re

	shift_id = [chr(i) for i in range(97, 107)]
	if 0 < shift_val < 3 * anchor_len - 2:
	# if shift_val in (1, 2):
	for j in range(len(sh_anc)):
	qcodon = "^"
	for k, aa in enumerate(sh_anc):
	if k == j:
	qcodon += aa2re[aa] + "(?P<" + shift_id[shift_id_pos] + ">..*)"
	else:
	qcodon += aa2re[aa]
	qcodon += "$"
	match = re.search(qcodon, sh_nuc)
	if match:
	qcodon = qcodon.replace("^", "").replace("$", "")
	shift_id_pos += 1
	return qcodon, shift_id_pos
	if not match:
	# failed to find a match (frameshift)
	return -1, shift_id_pos
	elif shift_val in (3 * anchor_len - 1, 3 * anchor_len - 2):
	shift_val = 3 * anchor_len - shift_val
	# obtain shifted anchor and corresponding nucl
	# first check if the shifted pos is just at the end of the
	# previous anchor.
	for j in range(1, len(sh_anc)):
	qcodon = "^"
	for k, aa in enumerate(sh_anc):
	if k == j - 1:
	# will be considered in the next step
	pass
	elif k == j:
	qcodon += _merge_aa2re(
	sh_anc[j - 1],
	sh_anc[j],
	shift_val,
	aa2re,
	shift_id[shift_id_pos].upper(),
	)
	else:
	qcodon += aa2re[aa]
	qcodon += "$"
	match = re.search(qcodon, sh_nuc)
	if match:
	qcodon = qcodon.replace("^", "").replace("$", "")
	shift_id_pos += 1
	return qcodon, shift_id_pos
	if not match:
	# failed to find a match (frameshift)
	return -1, shift_id_pos


	def _merge_aa2re(aa1, aa2, shift_val, aa2re, reid):
	"""Merge two amino acids based on detected frame shift value (PRIVATE)."""

	def get_aa_from_codonre(re_aa):
	aas = []
	m = 0
	for i in re_aa:
	if i == "[":
	m = -1
	aas.append("")
	elif i == "]":
	m = 0
	continue
	elif m == -1:
	aas[-1] = aas[-1] + i
	elif m == 0:
	aas.append(i)
	return aas

	scodon = list(map(get_aa_from_codonre, (aa2re[aa1], aa2re[aa2])))
	if shift_val == 1:
	intersect = "".join(set(scodon[0][2]) & set(scodon[1][0]))
	scodonre = "(?P<" + reid + ">"
	scodonre += (
	"["
	+ scodon[0][0]
	+ "]"
	+ "["
	+ scodon[0][1]
	+ "]"
	+ "["
	+ intersect
	+ "]"
	+ "["
	+ scodon[1][1]
	+ "]"
	+ "["
	+ scodon[1][2]
	+ "]"
	)
	elif shift_val == 2:
	intersect1 = "".join(set(scodon[0][1]) & set(scodon[1][0]))
	intersect2 = "".join(set(scodon[0][2]) & set(scodon[1][1]))
	scodonre = "(?P<" + reid + ">"
	scodonre += (
	"["
	+ scodon[0][0]
	+ "]"
	+ "["
	+ intersect1
	+ "]"
	+ "["
	+ intersect2
	+ "]"
	+ "["
	+ scodon[1][2]
	+ "]"
	)
	scodonre += ")"
	return scodonre


	def _get_codon_rec(
	pro, nucl, span_mode, gap_char, codon_table, complete_protein=False, max_score=10
	):
	"""Generate codon alignment based on regular re match (PRIVATE).

	span_mode is a tuple returned by _check_corr. The first element
	is the span of a re search, and the second element is the mode
	for the match.

	mode
	- 0: direct match
	- 1: mismatch (no indels)
	- 2: frameshift

	"""
	import re
	from Bio.Seq import Seq

	nucl_seq = nucl.seq.replace(gap_char, "")
	span = span_mode[0]
	mode = span_mode[1]
	aa2re = _get_aa_regex(codon_table)
	if mode in (0, 1):
	if len(pro.seq.replace(gap_char, "")) * 3 != (span[1] - span[0]):
	raise ValueError(
	f"Protein Record {pro.id} and "
	f"Nucleotide Record {nucl.id} do not match!"
	)
	aa_num = 0
	codon_seq = CodonSeq()
	for aa in pro.seq:
	if aa == "-":
	codon_seq += "---"
	elif complete_protein and aa_num == 0:
	this_codon = nucl_seq[span[0] : span[0] + 3]
	if not re.search(
	_codons2re(codon_table.start_codons), str(this_codon.upper())
	):
	max_score -= 1
	warnings.warn(
	f"start codon of {pro.id} ({aa} {aa_num}) does not "
	f"correspond to {nucl.id} ({this_codon})",
	BiopythonWarning,
	)
	if max_score == 0:
	raise RuntimeError(
	f"max_score reached for {nucl.id}! Please raise up "
	"the tolerance to get an alignment in anyway"
	)
	codon_seq += this_codon
	aa_num += 1
	else:
	this_codon = nucl_seq[span[0] + 3 * aa_num : span[0] + 3 * (aa_num + 1)]
	if this_codon.upper().translate(table=codon_table) != aa:
	max_score -= 1
	warnings.warn(
	"%s(%s %d) does not correspond to %s(%s)"
	% (pro.id, aa, aa_num, nucl.id, this_codon),
	BiopythonWarning,
	)
	if max_score == 0:
	raise RuntimeError(
	f"max_score reached for {nucl.id}! Please raise up "
	"the tolerance to get an alignment in anyway"
	)
	codon_seq += this_codon
	aa_num += 1
	return SeqRecord(codon_seq, id=nucl.id)
	elif mode == 2:
	from collections import deque

	shift_pos = deque([])
	shift_start = []
	match = span_mode[2]
	m_groupdict = list(match.groupdict().keys())
	# backward frameshift
	for i in m_groupdict:
	shift_pos.append(match.span(i))
	shift_start.append(match.start(i))
	rf_table = []
	i = match.start()
	while True:
	rf_table.append(i)
	i += 3
	if i in shift_start and m_groupdict[shift_start.index(i)].isupper():
	shift_index = shift_start.index(i)
	shift_val = 6 - (shift_pos[shift_index][1] - shift_pos[shift_index][0])
	rf_table.append(i)
	rf_table.append(i + 3 - shift_val)
	i = shift_pos[shift_index][1]
	elif i in shift_start and m_groupdict[shift_start.index(i)].islower():
	i = shift_pos[shift_start.index(i)][1]
	if i >= match.end():
	break
	codon_seq = CodonSeq()
	aa_num = 0
	for aa in pro.seq:
	if aa == "-":
	codon_seq += "---"
	elif complete_protein and aa_num == 0:
	this_codon = nucl_seq[rf_table[0] : rf_table[0] + 3]
	if not re.search(
	_codons2re(codon_table.start_codons), str(this_codon.upper())
	):
	max_score -= 1
	warnings.warn(
	f"start codon of {pro.id}({aa} {aa_num}) does not "
	f"correspond to {nucl.id}({this_codon})",
	BiopythonWarning,
	)
	codon_seq += this_codon
	aa_num += 1
	else:
	if (
	aa_num < len(pro.seq.replace("-", "")) - 1
	and rf_table[aa_num + 1] - rf_table[aa_num] - 3 < 0
	):
	max_score -= 1
	start = rf_table[aa_num]
	end = start + (3 - shift_val)
	ngap = shift_val
	this_codon = nucl_seq[start:end] + "-" * ngap
	elif rf_table[aa_num] - rf_table[aa_num - 1] - 3 > 0:
	max_score -= 1
	start = rf_table[aa_num - 1] + 3
	end = rf_table[aa_num]
	ngap = 3 - (rf_table[aa_num] - rf_table[aa_num - 1] - 3)
	this_codon = (
	nucl_seq[start:end]
	+ "-" * ngap
	+ nucl_seq[rf_table[aa_num] : rf_table[aa_num] + 3]
	)
	else:
	start = rf_table[aa_num]
	end = start + 3
	this_codon = nucl_seq[start:end]
	if this_codon.upper().translate(table=codon_table) != aa:
	max_score -= 1
	warnings.warn(
	f"Codon of {pro.id}({aa} {aa_num}) does not "
	f"correspond to {nucl.id}({this_codon})",
	BiopythonWarning,
	)
	if max_score == 0:
	raise RuntimeError(
	f"max_score reached for {nucl.id}! Please raise up "
	"the tolerance to get an alignment in anyway"
	)
	codon_seq += this_codon
	aa_num += 1
	codon_seq.rf_table = rf_table
	return SeqRecord(codon_seq, id=nucl.id)


	def _align_shift_recs(recs):
	"""Build alignment according to the frameshift detected by _check_corr (PRIVATE).

	Argument:
	- recs - a list of SeqRecords containing a CodonSeq dictated
	by a rf_table (with frameshift in some of them).

	"""

	def find_next_int(k, lst):
	idx = lst.index(k)
	p = 0
	while True:
	if isinstance(lst[idx + p], int):
	return lst[idx + p], p
	p += 1

	full_rf_table_lst = [rec.seq.get_full_rf_table() for rec in recs]
	rf_num = [0] * len(recs)
	for k, rec in enumerate(recs):
	for i in rec.seq.get_full_rf_table():
	if isinstance(i, int):
	rf_num[k] += 1
	# isinstance(i, float) should be True
	elif rec.seq[int(i) : int(i) + 3] == "---":
	rf_num[k] += 1
	if len(set(rf_num)) != 1:
	raise RuntimeError("Number of alignable codons unequal in given records")
	i = 0
	rec_num = len(recs)
	while True:
	add_lst = []
	try:
	col_rf_lst = [k[i] for k in full_rf_table_lst]
	except IndexError:
	# we probably reached the last codon
	break
	for j, k in enumerate(col_rf_lst):
	add_lst.append((j, int(k)))
	if isinstance(k, float) and recs[j].seq[int(k) : int(k) + 3] != "---":
	m, p = find_next_int(k, full_rf_table_lst[j])
	if (m - k) % 3 != 0:
	gap_num = 3 - (m - k) % 3
	else:
	gap_num = 0
	if gap_num != 0:
	gaps = "-" * int(gap_num)
	seq = CodonSeq(rf_table=recs[j].seq.rf_table)
	seq += recs[j].seq[: int(k)] + gaps + recs[j].seq[int(k) :]
	full_rf_table = full_rf_table_lst[j]
	bp = full_rf_table.index(k)
	full_rf_table = full_rf_table[:bp] + [
	v + int(gap_num) for v in full_rf_table[bp + 1 :]
	]
	full_rf_table_lst[j] = full_rf_table
	recs[j].seq = seq
	add_lst.pop()
	gap_num += m - k
	i += p - 1
	if len(add_lst) != rec_num:
	for j, k in add_lst:
	seq = CodonSeq(rf_table=recs[j].seq.rf_table)
	gaps = "-" * int(gap_num)
	seq += recs[j].seq[: int(k)] + gaps + recs[j].seq[int(k) :]
	full_rf_table = full_rf_table_lst[j]
	bp = full_rf_table.index(k)
	inter_rf = []
	for t in range(0, len(gaps), 3):
	inter_rf.append(k + t + 3.0)
	full_rf_table = (
	full_rf_table[:bp]
	+ inter_rf
	+ [v + int(gap_num) for v in full_rf_table[bp:]]
	)
	full_rf_table_lst[j] = full_rf_table
	recs[j].seq = seq
	i += 1
	return recs


	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()