Spaces:
No application file
No application file
DrVai-Rag-Testing
/
myenv
/lib
/python3.10
/site-packages
/Bio
/Sequencing
/Applications
/_samtools.py
# Copyright 2014 Saket Choudhary. All rights reserved. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
"""Command line wrapper for samtools.""" | |
# Last Checked with samtools [0.1.20 and 1.2] | |
# TODO samtools 1.x has additional options over 0.x which | |
# are missing from this wrapper | |
from Bio.Application import _Option, _Argument, _Switch | |
from Bio.Application import AbstractCommandline, _ArgumentList | |
from Bio.Application import _StaticArgument | |
class SamtoolsViewCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools view. | |
Extract/print all or sub alignments in SAM or BAM format, equivalent to:: | |
$ samtools view [-bchuHS] [-t in.refList] [-o output] [-f reqFlag] | |
[-F skipFlag] [-q minMapQ] [-l library] [-r readGroup] | |
[-R rgFile] <in.bam>|<in.sam> [region1 [...]] | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsViewCommandline | |
>>> input_file = "/path/to/sam_or_bam_file" | |
>>> samtools_view_cmd = SamtoolsViewCommandline(input_file=input_file) | |
>>> print(samtools_view_cmd) | |
samtools view /path/to/sam_or_bam_file | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("view"), | |
_Switch(["-b", "b"], "Output in the BAM format"), | |
_Switch( | |
["-c", "c"], | |
"""Instead of printing the alignments, only count them and | |
print the total number. | |
All filter options, such as '-f', '-F' and '-q', | |
are taken into account""", | |
), | |
_Switch(["-h", "h"], "Include the header in the output"), | |
_Switch( | |
["-u", "u"], | |
"""Output uncompressed BAM. | |
This option saves time spent on compression/decompression | |
and is thus preferred when the output is piped to | |
another samtools command""", | |
), | |
_Switch(["-H", "H"], "Output the header only"), | |
_Switch( | |
["-S", "S"], | |
"""Input is in SAM. | |
If @SQ header lines are absent, | |
the '-t' option is required.""", | |
), | |
_Option( | |
["-t", "t"], | |
"""This file is TAB-delimited. | |
Each line must contain the reference name and the | |
length of the reference, one line for each | |
distinct reference; additional fields are ignored. | |
This file also defines the order of the reference | |
sequences in sorting. | |
If you run 'samtools faidx <ref.fa>', | |
the resultant index file <ref.fa>.fai can be used | |
as this <in.ref_list> file.""", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-o", "o"], | |
"Output file", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-f", "f"], | |
"""Only output alignments with all bits in | |
INT present in the FLAG field""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-F", "F"], | |
"Skip alignments with bits present in INT", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-q", "q"], | |
"Skip alignments with MAPQ smaller than INT", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-r", "r"], | |
"Only output reads in read group STR", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-R", "R"], | |
"Output reads in read groups listed in FILE", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-l", "l"], | |
"Only output reads in library STR", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Switch( | |
["-1", "fast_bam"], | |
"Use zlib compression level 1 to compress the output", | |
), | |
_Argument( | |
["input", "input_file"], | |
"Input File Name", | |
filename=True, | |
is_required=True, | |
), | |
_Argument(["region"], "Region", is_required=False), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsMpileupCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools mpileup. | |
Generate BCF or pileup for one or multiple BAM files, equivalent to:: | |
$ samtools mpileup [-EBug] [-C capQcoef] [-r reg] [-f in.fa] | |
[-l list] [-M capMapQ] [-Q minBaseQ] | |
[-q minMapQ] in.bam [in2.bam [...]] | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsMpileupCommandline | |
>>> input = ["/path/to/sam_or_bam_file"] | |
>>> samtools_mpileup_cmd = SamtoolsMpileupCommandline(input_file=input) | |
>>> print(samtools_mpileup_cmd) | |
samtools mpileup /path/to/sam_or_bam_file | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("mpileup"), | |
_Switch( | |
["-E", "E"], | |
"""Extended BAQ computation. | |
This option helps sensitivity especially | |
for MNPs, but may hurt specificity a little bit""", | |
), | |
_Switch( | |
["-B", "B"], | |
"""Disable probabilistic realignment for the | |
computation of base alignment quality (BAQ). | |
BAQ is the Phred-scaled probability of a read base being | |
misaligned. | |
Applying this option greatly helps to reduce false SNPs | |
caused by misalignments""", | |
), | |
_Switch( | |
["-g", "g"], | |
"""Compute genotype likelihoods and output them in the | |
binary call format (BCF)""", | |
), | |
_Switch( | |
["-u", "u"], | |
"""Similar to -g except that the output is | |
uncompressed BCF, which is preferred for piping""", | |
), | |
_Option( | |
["-C", "C"], | |
"""Coefficient for downgrading mapping quality for | |
reads containing excessive mismatches. | |
Given a read with a phred-scaled probability q of | |
being generated from the mapped position, | |
the new mapping quality is about sqrt((INT-q)/INT)*INT. | |
A zero value disables this functionality; | |
if enabled, the recommended value for BWA is 50""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-r", "r"], | |
"Only generate pileup in region STR", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-f", "f"], | |
"""The faidx-indexed reference file in the FASTA format. | |
The file can be optionally compressed by razip""", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-l", "l"], | |
"""BED or position list file containing a list of regions | |
or sites where pileup or BCF should be generated""", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-M", "M"], | |
"Cap Mapping Quality at M", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-q", "q"], | |
"Minimum mapping quality for an alignment to be used", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-Q", "Q"], | |
"Minimum base quality for a base to be considered", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Switch( | |
["-6", "illumina_13"], | |
"Assume the quality is in the Illumina 1.3+ encoding", | |
), | |
_Switch( | |
["-A", "A"], "Do not skip anomalous read pairs in variant calling." | |
), | |
_Option( | |
["-b", "b"], | |
"List of input BAM files, one file per line", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-d", "d"], | |
"At a position, read maximally INT reads per input BAM", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Switch(["-D", "D"], "Output per-sample read depth"), | |
_Switch( | |
["-S", "S"], | |
"""Output per-sample Phred-scaled | |
strand bias P-value""", | |
), | |
_Option( | |
["-e", "e"], | |
"""Phred-scaled gap extension sequencing error probability. | |
Reducing INT leads to longer indels""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-h", "h"], | |
"""Coefficient for modeling homopolymer errors. | |
Given an l-long homopolymer run, the sequencing error | |
of an indel of size s is modeled as INT*s/l""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Switch(["-I", "I"], "Do not perform INDEL calling"), | |
_Option( | |
["-L", "L"], | |
"""Skip INDEL calling if the average per-sample | |
depth is above INT""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-o", "o"], | |
"""Phred-scaled gap open sequencing error probability. | |
Reducing INT leads to more indel calls.""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-p", "p"], | |
"""Comma delimited list of platforms (determined by @RG-PL) | |
from which indel candidates are obtained. | |
It is recommended to collect indel candidates from | |
sequencing technologies that have low indel error rate | |
such as ILLUMINA""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_ArgumentList( | |
["input_file"], | |
"Input File for generating mpileup", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsReheaderCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools reheader. | |
Replace the header in in.bam with the header | |
in in.header.sam, equivalent to:: | |
$ samtools reheader <in.header.sam> <in.bam> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsReheaderCommandline | |
>>> input_header = "/path/to/header_sam_file" | |
>>> input_bam = "/path/to/input_bam_file" | |
>>> reheader_cmd = SamtoolsReheaderCommandline(input_header=input_header, | |
... input_bam=input_bam) | |
>>> print(reheader_cmd) | |
samtools reheader /path/to/header_sam_file /path/to/input_bam_file | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("reheader"), | |
_Argument( | |
["input_header", "header_sam", "sam_file"], | |
"Sam file with header", | |
filename=True, | |
is_required=True, | |
), | |
_Argument( | |
["input_bam", "input_file", "bam_file"], | |
"BAM file for writing header to", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsCatCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools cat. | |
Concatenate BAMs, equivalent to:: | |
$ samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [ ... ] | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsCatCommandline | |
>>> input_bam1 = "/path/to/input_bam1" | |
>>> input_bam2 = "/path/to/input_bam2" | |
>>> input_bams = [input_bam1, input_bam2] | |
>>> samtools_cat_cmd = SamtoolsCatCommandline(input_bam=input_bams) | |
>>> print(samtools_cat_cmd) | |
samtools cat /path/to/input_bam1 /path/to/input_bam2 | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("cat"), | |
_Option( | |
["-h", "h"], | |
"Header SAM file", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-o", "o"], | |
"Output SAM file", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_ArgumentList( | |
["input", "input_bam", "bams"], | |
"Input BAM files", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsVersion0xSortCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools version 0.1.x sort. | |
Concatenate BAMs, equivalent to:: | |
$ samtools sort [-no] [-m maxMem] <in.bam> <out.prefix> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsVersion0xSortCommandline | |
>>> input_bam = "/path/to/input_bam" | |
>>> out_prefix = "/path/to/out_prefix" | |
>>> samtools_sort_cmd = SamtoolsVersion0xSortCommandline(input=input_bam, out_prefix=out_prefix) | |
>>> print(samtools_sort_cmd) | |
samtools sort /path/to/input_bam /path/to/out_prefix | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
# options for version samtools 0.0.19 | |
self.parameters = [ | |
_StaticArgument("sort"), | |
_Switch( | |
["-o", "o"], | |
"""Output the final alignment | |
to the standard output""", | |
), | |
_Switch( | |
["-n", "n"], | |
"""Sort by read names rather | |
than by chromosomal coordinates""", | |
), | |
_Option( | |
["-m", "m"], | |
"Approximately the maximum required memory", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Argument(["input"], "Input BAM file", filename=True, is_required=True), | |
_Argument(["out_prefix"], "Output prefix", filename=True, is_required=True), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsVersion1xSortCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools version 1.3.x sort. | |
Concatenate BAMs, equivalent to:: | |
$ samtools sort [-n] [-T FREFIX] [-o file] [-I INT] [-m maxMem] <in.bam> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsVersion1xSortCommandline | |
>>> input_bam = "/path/to/input_bam" | |
>>> FREFIX = "/path/to/out_prefix" | |
>>> file_name = "/path/to/out_file" | |
>>> samtools_sort_cmd = SamtoolsVersion1xSortCommandline(input=input_bam, T=FREFIX, o=file_name) | |
>>> print(samtools_sort_cmd) | |
samtools sort -o /path/to/out_file -T /path/to/out_prefix /path/to/input_bam | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
# options for version samtools 1.3.1 | |
self.parameters = [ | |
_StaticArgument("sort"), | |
_Switch( | |
["-n", "n"], | |
"""Sort by read names rather | |
than by chromosomal coordinates""", | |
), | |
_Option( | |
["-o", "o"], | |
"""(file) Write the final sorted output to FILE, | |
rather than to standard output""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-O", "O"], | |
"""(FORMAT) Write the final output as sam, bam, or cram""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-T", "T"], | |
"""(PREFIX) Write temporary files to PREFIX.nnnn.bam, or if the specified PREFIX | |
is an existing directory, to PREFIX/samtools.mmm.mmm.tmp.nnnn.bam, | |
where mmm is unique to this invocation of the sort command""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-I", "I"], | |
"""(INT) Set the desired compression level for the final output file, | |
ranging from 0 (uncompressed) or 1 (fastest but minimal compression) | |
to 9 (best compression but slowest to write), similarly to gzip(1)'s compression level setting.""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-m", "m"], | |
"Approximately the maximum required memory", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Argument( | |
["input"], "Input SAM/BAM/CRAM file", filename=True, is_required=True | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsMergeCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools merge. | |
Merge multiple sorted alignments, equivalent to:: | |
$ samtools merge [-nur1f] [-h inh.sam] [-R reg] | |
<out.bam> <in1.bam> <in2.bam> [...] | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsMergeCommandline | |
>>> out_bam = "/path/to/out_bam" | |
>>> in_bam = ["/path/to/input_bam1", "/path/to/input_bam2"] | |
>>> merge_cmd = SamtoolsMergeCommandline(out_bam=out_bam, | |
... input_bam=in_bam) | |
>>> print(merge_cmd) | |
samtools merge /path/to/out_bam /path/to/input_bam1 /path/to/input_bam2 | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("merge"), | |
_Switch( | |
["-n", "n"], | |
"""The input alignments are sorted by read names | |
rather than by chromosomal coordinates""", | |
), | |
_Switch( | |
["-r", "r"], | |
"""Attach an RG tag to each alignment. | |
The tag value is inferred from file names""", | |
), | |
_Switch(["-u", "u"], "Uncompressed BAM output"), | |
_Switch( | |
["-1", "fast_bam"], | |
"""Use zlib compression level 1 | |
to compress the output""", | |
), | |
_Switch( | |
["-f", "f"], | |
"""Force to overwrite the | |
output file if present""", | |
), | |
_Option( | |
["-h", "h"], | |
"""Use the lines of FILE as '@' | |
headers to be copied to out.bam""", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-R", "R"], | |
"Merge files in the specified region indicated by STR", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Argument( | |
["output_bam", "out_bam", "out", "output"], | |
"Output BAM file", | |
filename=True, | |
is_required=True, | |
), | |
_ArgumentList( | |
["input_bam", "in_bam", "input", "bam"], | |
"Input BAM", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsIndexCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools index. | |
Index sorted alignment for fast random access, equivalent to:: | |
$ samtools index <aln.bam> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsIndexCommandline | |
>>> input = "/path/to/aln_bam" | |
>>> samtools_index_cmd = SamtoolsIndexCommandline(input_bam=input) | |
>>> print(samtools_index_cmd) | |
samtools index /path/to/aln_bam | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("index"), | |
_Argument(["input", "in_bam", "input_bam"], "BAM file to be indexed"), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsIdxstatsCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools idxstats. | |
Retrieve and print stats in the index file, equivalent to:: | |
$ samtools idxstats <aln.bam> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsIdxstatsCommandline | |
>>> input = "/path/to/aln_bam" | |
>>> samtools_idxstats_cmd = SamtoolsIdxstatsCommandline(input_bam=input) | |
>>> print(samtools_idxstats_cmd) | |
samtools idxstats /path/to/aln_bam | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("idxstats"), | |
_Argument(["input", "in_bam", "input_bam"], "BAM file to be indexed"), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsFaidxCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools faidx. | |
Retrieve and print stats in the index file, equivalent to:: | |
$ samtools faidx <ref.fasta> [region1 [...]] | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsFaidxCommandline | |
>>> reference = "/path/to/reference.fasta" | |
>>> samtools_faidx_cmd = SamtoolsFaidxCommandline(reference=reference) | |
>>> print(samtools_faidx_cmd) | |
samtools faidx /path/to/reference.fasta | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("faidx"), | |
_Argument( | |
["reference", "reference_fasta", "ref"], | |
"Reference FASTA to be indexed", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsFixmateCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools fixmate. | |
Fill in mate coordinates, ISIZE and mate related | |
flags from a name-sorted alignment, equivalent to:: | |
$ samtools fixmate <in.nameSrt.bam> <out.bam> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsFixmateCommandline | |
>>> in_bam = "/path/to/in.nameSrt.bam" | |
>>> out_bam = "/path/to/out.bam" | |
>>> fixmate_cmd = SamtoolsFixmateCommandline(input_bam=in_bam, | |
... out_bam=out_bam) | |
>>> print(fixmate_cmd) | |
samtools fixmate /path/to/in.nameSrt.bam /path/to/out.bam | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("fixmate"), | |
_Argument( | |
["in_bam", "sorted_bam", "input_bam", "input", "input_file"], | |
"Name Sorted Alignment File ", | |
filename=True, | |
is_required=True, | |
), | |
_Argument( | |
["out_bam", "output_bam", "output", "output_file"], | |
"Output file", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsRmdupCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools rmdup. | |
Remove potential PCR duplicates, equivalent to:: | |
$ samtools rmdup [-sS] <input.srt.bam> <out.bam> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsRmdupCommandline | |
>>> input_sorted_bam = "/path/to/input.srt.bam" | |
>>> out_bam = "/path/to/out.bam" | |
>>> rmdup_cmd = SamtoolsRmdupCommandline(input_bam=input_sorted_bam, | |
... out_bam=out_bam) | |
>>> print(rmdup_cmd) | |
samtools rmdup /path/to/input.srt.bam /path/to/out.bam | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("rmdup"), | |
_Switch( | |
["-s", "s"], | |
"""Remove duplicates for single-end reads. | |
By default, the command works for paired-end | |
reads only""", | |
), | |
_Switch( | |
["-S", "S"], | |
"""Treat paired-end reads | |
as single-end reads""", | |
), | |
_Argument( | |
["in_bam", "sorted_bam", "input_bam", "input", "input_file"], | |
"Name Sorted Alignment File ", | |
filename=True, | |
is_required=True, | |
), | |
_Argument( | |
["out_bam", "output_bam", "output", "output_file"], | |
"Output file", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsCalmdCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools calmd. | |
Generate the MD tag, equivalent to:: | |
$ samtools calmd [-EeubSr] [-C capQcoef] <aln.bam> <ref.fasta> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsCalmdCommandline | |
>>> input_bam = "/path/to/aln.bam" | |
>>> reference_fasta = "/path/to/reference.fasta" | |
>>> calmd_cmd = SamtoolsCalmdCommandline(input_bam=input_bam, | |
... reference=reference_fasta) | |
>>> print(calmd_cmd) | |
samtools calmd /path/to/aln.bam /path/to/reference.fasta | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("calmd"), | |
_Switch( | |
["-E", "E"], | |
"""Extended BAQ calculation. | |
This option trades specificity for sensitivity, | |
though the effect is minor.""", | |
), | |
_Switch( | |
["-e", "e"], | |
"""Convert the read base to = if it is | |
identical to the aligned reference base. | |
Indel caller does not support the = bases | |
at the moment.""", | |
), | |
_Switch(["-u", "u"], "Output uncompressed BAM"), | |
_Switch(["-b", "b"], "Output compressed BAM "), | |
_Switch(["-S", "S"], "The input is SAM with header lines "), | |
_Switch( | |
["-r", "r"], | |
"""Compute the BQ tag (without -A) | |
or cap base quality by BAQ (with -A).""", | |
), | |
_Switch( | |
["-A", "A"], | |
"""When used jointly with -r this option overwrites | |
the original base quality""", | |
), | |
_Option( | |
["-C", "C"], | |
"""Coefficient to cap mapping quality | |
of poorly mapped reads. | |
See the pileup command for details.""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Argument( | |
["input", "input_file", "in_bam", "infile", "input_bam"], | |
"Input BAM", | |
filename=True, | |
is_required=True, | |
), | |
_Argument( | |
["reference", "reference_fasta", "ref"], | |
"Reference FASTA to be indexed", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsTargetcutCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools targetcut. | |
This command identifies target regions by examining the continuity | |
of read depth, computes haploid consensus sequences of targets | |
and outputs a SAM with each sequence corresponding to a target, | |
equivalent to:: | |
$ samtools targetcut [-Q minBaseQ] [-i inPenalty] [-0 em0] | |
[-1 em1] [-2 em2] [-f ref] <in.bam> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsTargetcutCommandline | |
>>> input_bam = "/path/to/aln.bam" | |
>>> samtools_targetcut_cmd = SamtoolsTargetcutCommandline(input_bam=input_bam) | |
>>> print(samtools_targetcut_cmd) | |
samtools targetcut /path/to/aln.bam | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("targetcut"), | |
_Option( | |
["-Q", "Q"], | |
"Minimum Base Quality ", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-i", "i"], | |
"Insertion Penalty", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-f", "f"], | |
"Reference Filename", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-0", "em0"], | |
"em0", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-1", "em1"], | |
"em1", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Option( | |
["-2", "em2"], | |
"em2", | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Argument( | |
["input", "input_bam", "in_bam"], | |
"Input file", | |
filename=True, | |
is_required=True, | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
class SamtoolsPhaseCommandline(AbstractCommandline): | |
"""Command line wrapper for samtools phase. | |
Call and phase heterozygous SNPs, equivalent to:: | |
$ samtools phase [-AF] [-k len] [-b prefix] | |
[-q minLOD] [-Q minBaseQ] <in.bam> | |
See http://samtools.sourceforge.net/samtools.shtml for more details | |
Examples | |
-------- | |
>>> from Bio.Sequencing.Applications import SamtoolsPhaseCommandline | |
>>> input_bam = "/path/to/in.bam" | |
>>> samtools_phase_cmd = SamtoolsPhaseCommandline(input_bam=input_bam) | |
>>> print(samtools_phase_cmd) | |
samtools phase /path/to/in.bam | |
""" | |
def __init__(self, cmd="samtools", **kwargs): | |
"""Initialize the class.""" | |
self.program_name = cmd | |
self.parameters = [ | |
_StaticArgument("phase"), | |
_Argument( | |
["input", "input_bam", "in_bam"], | |
"Input file", | |
filename=True, | |
is_required=True, | |
), | |
_Switch(["-A", "A"], "Drop reads with ambiguous phase"), | |
_Option( | |
["-b", "b"], | |
"Prefix of BAM output", | |
filename=True, | |
equate=False, | |
checker_function=lambda x: isinstance(x, str), | |
), | |
_Switch(["-F", "F"], "Do not attempt to fix chimeric reads"), | |
_Option( | |
["-k", "k"], | |
"Maximum length for local phasing", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-q", "q"], | |
"""Minimum Phred-scaled LOD to | |
call a heterozygote""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
_Option( | |
["-Q", "Q"], | |
"""Minimum base quality to be | |
used in het calling""", | |
equate=False, | |
checker_function=lambda x: isinstance(x, int), | |
), | |
] | |
AbstractCommandline.__init__(self, cmd, **kwargs) | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |