Spaces:
Sleeping
Sleeping
"""Provides the standard data processing functions performed on CNMFe and annotation data""" | |
import numpy as np | |
from scipy.signal import correlate | |
from scipy.stats import zscore | |
def smooth(data: np.ndarray, window_size=5): | |
""" | |
Returns a smoothed version of response data using a moving average filter. | |
Parameters: | |
---------- | |
data : np.ndarray | |
A numpy 1-D array containing data to be smoothed. | |
window_size : int | |
Number of data points for calculating the smoothed value. If an even number is | |
passed in, window_size is autmoatically reduced by 1. | |
Returns: | |
-------- | |
smooth_data : np.ndarray | |
Smoothed data, returned as a 1-D array of the same size as ``data``. | |
Notes: | |
------ | |
Implements MATLAB's smooth function. | |
""" | |
if window_size == 0: | |
raise ValueError('window_size can not be 0.') | |
if window_size == 1: | |
return data | |
if window_size > data.size: | |
window_size = data.size | |
if window_size%2 == 0: | |
window_size = window_size - 1 | |
outside_valid_window_size = int((window_size-1)/2) | |
start = np.array([np.sum(data[0:(2*k+1)]/(2*k+1)) for k in range(outside_valid_window_size)]) | |
end = np.array([np.sum(data[-(2*k+1):]/(2*k+1)) for k in range(outside_valid_window_size)])[::-1] | |
smoothed_data = np.convolve(data,np.ones(window_size,dtype=int),'valid')/window_size | |
return np.hstack((start,smoothed_data,end)) | |
def corr(x: np.ndarray, y: np.ndarray): | |
""" | |
Returns a matrix of the pairwise correlation coefficient between each pair of columns | |
in the input matrices x and y. | |
Parameters: | |
----------- | |
x : np.ndarray | |
Input matrix, specified as an n x k_1 matrix. Its rows correspond to | |
observations, and the columns correspond to variables. | |
y : np.ndarray | |
Input matrix, specified as an n x k_2 matrix. Its rows correspond to | |
observations, and the columns correspond to variables. | |
Returns: | |
-------- | |
rho - Pairwise linear correlation coefficient, returned as a matrix. | |
Notes: | |
------ | |
Implements MATLAB's corr function. | |
""" | |
return np.corrcoef(x,y)[0][1] | |
def autocorr(x:np.ndarray, | |
max_lags=10): | |
""" | |
Returns the correlations and associated lags of the univariate time series x. | |
Parameters: | |
----------- | |
x : np.ndarray | |
Observed univariate time series. | |
max_lags : int | |
Number of lags, specified as a positive integer. | |
Returns: | |
acf : np.ndarray | |
Correlations, returned as a numeric vector of length ``max_lags`` + 1. | |
lags : np.ndarray | |
Autocorrelation lags. | |
Notes: | |
------ | |
Modified version of matplotlib's acorr function. | |
""" | |
Nx = len(x) | |
correls = correlate(x, x, mode="full") | |
correls = correls / np.dot(x, x) | |
if max_lags is None: | |
max_lags = Nx - 1 | |
if max_lags >= Nx or max_lags < 1: | |
raise ValueError('maxlags must be None or strictly ' | |
'positive < %d' % Nx) | |
lags = np.arange(-max_lags, max_lags + 1) | |
acf = correls[Nx - 1 - max_lags:Nx + max_lags] | |
return acf, lags | |
def convert_to_rast(behavior_ts, time_max): | |
""" | |
Converts a list of behavior time stamps to a one-hot vector where 0 indicates no | |
presence of the given behavior, and 1 indicates presence of it. | |
Args: | |
behavior_ts - a list of time stamps (start and end) for a particular behavior\n | |
time_max - the length in frames of the vector | |
Returns: | |
behavior_rast - a one-hot vector | |
""" | |
behavior_rast = np.zeros(time_max) | |
for time_stamps in behavior_ts: | |
start = int(round(time_stamps[0])) | |
end = int(round(time_stamps[1] + 1)) | |
if start > time_max: | |
break | |
if end > time_max: | |
end = time_max | |
np.put(behavior_rast,range(start,end),np.ones(end-start)) | |
return behavior_rast | |
def convert_to_raster(bouts: list, | |
neural_activity_sr: float, | |
observation_sr: float, | |
max_frame: int): | |
""" | |
Converts bouts into a behavior raster, a one hot encoding of a behavior describing | |
when it is active. | |
It is often the case that the start and stop timestamps found in ``bouts`` are | |
collected at a different sample rate than ``neural_activity``, which are often what | |
behavior rasters align to. In order to align the two, a ratio between the sample | |
rates of ``neural_activity`` and the bouts of behavior, which are observations, | |
is calculated and then multiplied to the timestamps. | |
Parameters: | |
----------- | |
bouts : np.ndarray | |
An array where each element is a pair of integers where the first integer denotes | |
the beginning of a bout of behavior, and the second integer denotes the end of | |
the bout. | |
neural_activity_sr : float | |
Sample rate of ``neural_activity``. | |
observation_sr : float | |
Sample rate for the ``bouts`` used. | |
max_frame : int | |
The length of the behavior raster, often set to the number of frames of | |
``neural_activity``. | |
Returns: | |
-------- | |
behavior_raster : np.ndarray | |
A raster (a one hot encoding) of a behavior, describing when it is active. | |
""" | |
sr_ratio = neural_activity_sr/observation_sr | |
behavior_ts_adjusted = bouts*sr_ratio | |
behavior_raster = np.zeros(max_frame) | |
for time_stamps in behavior_ts_adjusted: | |
start = int(round(time_stamps[0])) | |
end = int(round(time_stamps[1] + 1)) | |
if start > max_frame: | |
break | |
if end > max_frame: | |
end = max_frame | |
np.put(behavior_raster,range(start,end),np.ones(end-start)) | |
return behavior_raster | |
def convert_to_bouts(behavior_raster: np.ndarray): | |
""" | |
Converts a behavior raster into behavior bouts, an array where each element is a | |
pair of timestamps (int) where the first timestamp denotes the beginning of a bout of | |
behavior, and the second timestamp denotes the end of the bout. | |
Parameters: | |
----------- | |
behavior_raster : np.ndarray | |
A raster (a one hot encoding) of a behavior, describing when it is active. | |
Returns: | |
-------- | |
bouts : np.ndarray | |
An array where each element is a pair of timestamps (int) where the first | |
timestamp denotes the beginning of a bout of behavior, and the second timestamp | |
denotes the end of the bout. | |
""" | |
dt = behavior_raster[1:] - behavior_raster[:-1] | |
start = np.where(dt==1)[0] + 1 | |
stop = np.where(dt==-1)[0] | |
if behavior_raster[0]: | |
start = np.concatenate((np.array([0]),start)) | |
if behavior_raster[-1]: | |
stop = np.concatenate((stop,[behavior_raster.size])) | |
bouts = np.hstack((np.reshape(start,(len(start),1)), | |
np.reshape(stop,(len(stop),1)))) | |
return bouts | |
def merge_rasters_down(behavior_raster_array: np.ndarray)-> np.ndarray: | |
""" | |
For a behavior raster, merges down all rasters to one array in such a way that no | |
two behaviors are occuring at the same time. | |
It determines which behavior should remain 'on top' by determening which behavior | |
has the least amount of active frames. | |
This method should only be used on behavior rasters where all behaviors come from a | |
single channel. | |
Parameters: | |
----------- | |
behavior_raster_array : np.ndarray | |
An array where each row is a behavior raster, a one hot encoding of behaviors, | |
describing when that behavior is active. Each row of this array must use a | |
different value to indicate that a behavior is active (for example, if one | |
row uses 1s, another row must not use 1 as well). | |
Returns: | |
-------- | |
single_track : np.ndarray | |
An array which is the length of a behavior raster in ``behavior_raster_array``, | |
where each entry is either 0 indicating that no behavior is active, or a value | |
indicating that a specific behavior is active. | |
""" | |
# single track | |
single_track = np.zeros((1,behavior_raster_array.shape[1])) | |
# determine order to insert row values | |
num_active_frames = [np.sum(np.where(row > 0, 1, 0)) for row in behavior_raster_array] | |
for i in range(behavior_raster_array.shape[0]): | |
max_i = np.argmax(num_active_frames) | |
num_active_frames[max_i] = -1 | |
unique_values = np.unique(behavior_raster_array[max_i]) | |
if len(unique_values) > 1: value = unique_values[1] | |
else: value = 0 | |
active_inds = np.where(behavior_raster_array[max_i] == value)[0] | |
single_track[:,active_inds] = value | |
return single_track | |
def separate_tracks(single_track: np.ndarray, | |
behavior_values: list): | |
""" | |
For a single track, separates each unique value (except for 0) into its own raster | |
within a 2-D array. | |
Parameters: | |
----------- | |
single_track : np.ndarray | |
An array which is the length of a behavior raster in ``behavior_raster_array``, | |
where each entry is either 0 indicating that no behavior is active, or a value | |
indicating that a specific behavior is active. | |
behavior_values : list | |
A list of values corresponding to the specific behaviors within ``single_track``. | |
Returns: | |
-------- | |
behavior_raster_array : np.ndarray | |
An array where each row is a behavior raster, a one hot encoding of behaviors, | |
describing when that behavior is active. | |
""" | |
if len(behavior_values) < np.unique(single_track).size - 1: | |
raise KeyError("There are not sufficient values within ``behavior_values`` to " | |
"accomodate those present in ``single_track``.") | |
tracks = [] | |
for value in behavior_values: | |
tracks.append(np.where(single_track == value, value, 0)) | |
return np.vstack(tracks) | |
def config_neural_activity(config: dict, neural_activity: np.ndarray): | |
""" | |
Configures `neural_activity` according to parameters set in config. | |
Parameters: | |
----------- | |
config : dict | |
A dictionary which specifies the following parameters: 'smooth_window', | |
'baseline_frame', and 'zscore_method'. 'zscore_method' is one of "All Data", | |
"Baseline", or "No Z-Score". | |
neural_activity : np.ndarray | |
Neural activity being used. | |
Returns: | |
-------- | |
mod_neural_activity : np.ndarray | |
Modified `neural_activity`, accodring to `config`. | |
""" | |
smooth_window = config['smooth_window'] | |
zscore_method = config['zscore_method'] | |
baseline_frame = config['baseline_frame'] | |
# smooth | |
if len(neural_activity.shape) > 1: | |
neural_data_smooth = np.zeros(neural_activity.shape) | |
for i in range(neural_activity.shape[0]): | |
neural_data_smooth[i] = smooth(neural_activity[i], int(smooth_window)) | |
mod_neural_activity = neural_data_smooth | |
else: | |
mod_neural_activity = smooth(neural_activity, int(smooth_window)) | |
# z-score | |
if zscore_method == 'Baseline' and (not baseline_frame is None or baseline_frame == 0): | |
if len(neural_activity.shape)> 1: | |
mean = mod_neural_activity[:,:baseline_frame].mean(axis=1,keepdims=True) | |
std = mod_neural_activity[:,:baseline_frame].std(axis=1,keepdims=True) | |
else: | |
mean = mod_neural_activity[:baseline_frame].mean() | |
std = mod_neural_activity[:baseline_frame].std() | |
mod_neural_activity = (mod_neural_activity - mean) / std | |
elif zscore_method == 'No Z-Score': | |
mod_neural_activity = mod_neural_activity | |
else: | |
if len(neural_activity.shape) > 1: | |
mod_neural_activity = zscore(mod_neural_activity,axis=1) | |
else: | |
mod_neural_activity = zscore(mod_neural_activity) | |
return mod_neural_activity | |
def compress_annotations(annot: dict, downsample_rate: int, max_frame: int)-> dict: | |
""" | |
Takes in an annotation dictionary and creates a single raster per channel, where the | |
raster contains the behaviors from their respective channel. | |
annot : dict | |
Dictionary of beginning and end frames for behaviors. | |
downsample_rate : int | |
The rate at which samples should be taken. Divides bout timing (in frames) by | |
value. | |
max_frame : int | |
The last frame for annotations from `annot`. | |
""" | |
annot_single_track = {} | |
channel_behavior_map = {} | |
for channel in annot: | |
channel_rasters = [] | |
behavior_map = {} | |
behavior_map.update({0: 'None'}) | |
for i, behavior in enumerate(annot[channel]): | |
bouts = annot[channel][behavior] | |
raster = convert_to_raster(bouts, 1, downsample_rate, max_frame) | |
channel_rasters.append(raster*(i+1)) | |
behavior_map.update({(i+1) : behavior}) | |
channel_raster = merge_rasters_down(np.array(channel_rasters))[0] | |
annot_single_track.update({channel : channel_raster}) | |
channel_behavior_map.update({channel : behavior_map}) | |
return annot_single_track, channel_behavior_map | |
def compress_compressed_annotations(annot_single_track: dict, | |
channel_behavior_map: dict, | |
max_frame: int): | |
""" | |
Further compresses the results from `compress_annotations` to get a single array | |
where each entry is a list of the behaviors present at that frame across all channels. | |
""" | |
labels = [] | |
for frame in range(max_frame): | |
labels_at_frame = [] | |
for channel in annot_single_track: | |
channel_raster = annot_single_track[channel] | |
behavior_map = channel_behavior_map[channel] | |
behavior_value = int(channel_raster[frame]) | |
behavior_label = behavior_map.get(behavior_value) | |
labels_at_frame.append(behavior_label) | |
labels.append('||'.join(labels_at_frame)) | |
return labels | |
def generate_label_array(annot: dict, | |
downsample_rate: int, | |
max_frame: int)-> list[str]: | |
""" | |
Generates an array of lists of labels, where each entry is a video frame, and the | |
labels come from each channel in `annot`. | |
""" | |
annot_single_track,\ | |
channel_behavior_map = compress_annotations(annot, downsample_rate, max_frame) | |
labels = compress_compressed_annotations(annot_single_track, | |
channel_behavior_map, | |
max_frame) | |
return labels | |