Spaces:
No application file
No application file
# Copyright 2003-2008 by Leighton Pritchard. All rights reserved. | |
# Revisions copyright 2008-2009 by Peter Cock. | |
# | |
# This file is part of the Biopython distribution and governed by your | |
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License". | |
# Please see the LICENSE file that should have been included as part of this | |
# package. | |
# | |
# Contact: Leighton Pritchard, The James Hutton Institute, | |
# Invergowrie, Dundee, Scotland, DD2 5DA, UK | |
# [email protected] | |
################################################################################ | |
"""Graph module. | |
Provides: | |
- GraphData - Contains data from which a graph will be drawn, and | |
information about its presentation | |
For drawing capabilities, this module uses reportlab to draw and write | |
the diagram: http://www.reportlab.com | |
""" | |
# ReportLab imports | |
from reportlab.lib import colors | |
from math import sqrt | |
class GraphData: | |
"""Graph Data. | |
Attributes: | |
- id Unique identifier for the data | |
- data Dictionary of describing the data, keyed by position | |
- name String describing the data | |
- style String ('bar', 'heat', 'line') describing how to draw the data | |
- poscolor colors.Color for drawing high (some styles) or all | |
values | |
- negcolor colors.Color for drawing low values (some styles) | |
- linewidth Int, thickness to draw the line in 'line' styles | |
""" | |
def __init__( | |
self, | |
id=None, | |
data=None, | |
name=None, | |
style="bar", | |
color=colors.lightgreen, | |
altcolor=colors.darkseagreen, | |
center=None, | |
colour=None, | |
altcolour=None, | |
): | |
"""Initialize. | |
Arguments: | |
- id Unique ID for the graph | |
- data List of (position, value) tuples | |
- name String describing the graph | |
- style String describing the presentation style ('bar', 'line', | |
'heat') | |
- color colors.Color describing the color to draw all or the | |
'high' (some styles) values (overridden by backwards | |
compatible argument with UK spelling, colour). | |
- altcolor colors.Color describing the color to draw the 'low' | |
values (some styles only) (overridden by backwards | |
compatible argument with UK spelling, colour). | |
- center Value at which x-axis crosses y-axis. | |
""" | |
# Let the UK spelling (colour) override the USA spelling (color) | |
if colour is not None: | |
color = colour | |
if altcolour is not None: | |
altcolor = altcolour | |
self.id = id # Unique identifier for the graph | |
self.data = {} # holds values, keyed by sequence position | |
if data is not None: | |
self.set_data(data) | |
self.name = name # Descriptive string | |
# Attributes describing how the graph will be drawn | |
self.style = style # One of 'bar', 'heat' or 'line' | |
self.poscolor = color # Color to draw all, or 'high' values | |
self.negcolor = altcolor # Color to draw 'low' values | |
self.linewidth = 2 # linewidth to use in line graphs | |
self.center = center # value at which x-axis crosses y-axis | |
def set_data(self, data): | |
"""Add data as a list of (position, value) tuples.""" | |
for (pos, val) in data: # Fill data dictionary | |
self.data[pos] = val | |
def get_data(self): | |
"""Return data as a list of sorted (position, value) tuples.""" | |
data = [] | |
for xval in self.data: | |
yval = self.data[xval] | |
data.append((xval, yval)) | |
data.sort() | |
return data | |
def add_point(self, point): | |
"""Add a single point to the set of data as a (position, value) tuple.""" | |
pos, val = point | |
self.data[pos] = val | |
def quartiles(self): | |
"""Return (minimum, lowerQ, medianQ, upperQ, maximum) values as tuple.""" | |
data = sorted(self.data.values()) | |
datalen = len(data) | |
return ( | |
data[0], | |
data[datalen // 4], | |
data[datalen // 2], | |
data[3 * datalen // 4], | |
data[-1], | |
) | |
def range(self): | |
"""Return range of data as (start, end) tuple. | |
Returns the range of the data, i.e. its start and end points on | |
the genome as a (start, end) tuple. | |
""" | |
positions = sorted(self.data) # i.e. dict keys | |
# Return first and last positions in graph | |
# print(len(self.data)) | |
return (positions[0], positions[-1]) | |
def mean(self): | |
"""Return the mean value for the data points (float).""" | |
data = list(self.data.values()) | |
return sum(data) / len(data) | |
def stdev(self): | |
"""Return the sample standard deviation for the data (float).""" | |
data = list(self.data.values()) | |
m = self.mean() | |
runtotal = 0.0 | |
for entry in data: | |
runtotal += (entry - m) ** 2 | |
# This is sample standard deviation; population stdev would involve | |
# division by len(data), rather than len(data)-1 | |
return sqrt(runtotal / (len(data) - 1)) | |
def __len__(self): | |
"""Return the number of points in the data set.""" | |
return len(self.data) | |
def __getitem__(self, index): | |
"""Return data value(s) at the given position. | |
Given an integer representing position on the sequence | |
returns a float - the data value at the passed position. | |
If a slice, returns graph data from the region as a list or | |
(position, value) tuples. Slices with step are not supported. | |
""" | |
if isinstance(index, int): | |
return self.data[index] | |
elif isinstance(index, slice): | |
# TODO - Why does it treat the end points both as inclusive? | |
# This doesn't match Python norms does it? | |
low = index.start | |
high = index.stop | |
if index.step is not None and index.step != 1: | |
raise ValueError | |
outlist = [] | |
for pos in sorted(self.data): | |
if pos >= low and pos <= high: | |
outlist.append((pos, self.data[pos])) | |
return outlist | |
else: | |
raise TypeError("Need an integer or a slice") | |
def __str__(self): | |
"""Return a string describing the graph data.""" | |
outstr = [f"\nGraphData: {self.name}, ID: {self.id}"] | |
outstr.append("Number of points: %d" % len(self.data)) | |
outstr.append(f"Mean data value: {self.mean()}") | |
outstr.append(f"Sample SD: {self.stdev():.3f}") | |
outstr.append( | |
"Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles() | |
) | |
outstr.append("Sequence Range: %s..%s" % self.range()) | |
return "\n".join(outstr) | |