File size: 4,991 Bytes

2e36228

import os
import random
import math


def temporal_batching_index(fr,length=16):
    '''
    Do padding or half-overlapping clips for video.
    
    Input:
        fr: number of frames
    Output:
        batch_indices: array for batch where each element is frame index 
    '''
    if fr < length: 
        #e.g. (1,2,3,4,5) to (1,1,....,1,2,3,4,5,5,...,5,5)
        right = int((length-fr)/2)
        left = length - right - fr
        return [[0]*left + list(range(fr)) + [fr-1]*right]
    
    batch_indices = []
    last_idx = fr - 1
    assert length%2 == 0
    half = int(length/2)
    for i in range(0,fr-half,half):
            frame_indices = [0,]*length
            for j in range(length):
                current_idx =  i + j 
                if current_idx < last_idx:
                    frame_indices[j] = current_idx
                else:
                    frame_indices[j] = last_idx
            batch_indices.append(frame_indices)
            
    return batch_indices

def temporal_sliding_window(clip,window = 16):
    '''
    Make a batched tensor with 16 frame sliding window with the overlap of 8. 
    If a clip is not the multiply of 8, it's padded with the last frames. (1,2...,13,14,14,14) for (1,..,14) 
    If a clip is less than 16 frames, padding is applied like (1,1,....,1,2,3,4,5,5,...,5,5) for (1,2,3,4,5)
    This can be used for sliding window evaluation.
    
    Input:  list of image paths
    Output: torch tensor of shape of (batch,ch,16,h,w).
    '''

    batch_indices = temporal_batching_index(len(clip),length = window)
    
    return [[clip[idx] for idx in  indices] for indices in batch_indices]

def temporal_center_crop(clip,length = 16):
    '''
    Input:  list of image paths
    Output: torch tensor of shape of (1,ch,16,h,w).
    '''
    fr = len(clip) 
    if fr < length: 
        #e.g. (1,2,3,4,5) to (1,1,....,1,2,3,4,5,5,...,5,5)
        right = int((length-fr)/2)
        left = length - right - fr
        indicies =  [0]*left + list(range(fr)) + [fr-1]*right
        output =  [clip[i] for i in indicies]
    elif fr==length:
        output =  clip    
    else:
        middle = int(fr/2)
        assert length%2 == 0
        half = int(length/2)
        start = middle - half
        output =  clip[start : start+length]
        
    return output[::2]



def random_temporal_crop(clip,length = 16):
    '''
    Just randomly sample 16 consecutive frames
    if less than 16 frames, just add padding.
    '''
    fr = len(clip) 
    if fr < length: 
        #e.g. (1,2,3,4,5) to (1,1,....,1,2,3,4,5,5,...,5,5)
        right = int((length-fr)/2)
        left = length - right - fr
        indicies =  [0]*left + list(range(fr)) + [fr-1]*right
        output =  [clip[i] for i in indicies]
    elif fr==length:
        output =  clip
    else:
        start=random.randint(0,fr-length)
        output =  clip[start : start+length]
    return output[::2]


def use_all_frames(clip):
    '''
    Just use it as it is :)
    '''
    return clip

def looppadding(clip, length=16):


        out = clip

        for index in out:
            if len(out) >= length:
                break
            out.append(index)

        return out[::2]

def temporal_even_crop(clip, length=16, n_samples=1):

        clip = list(clip)
        n_frames = len(clip)
        indices = list(range(len(clip)))
        stride = max(
            1, math.ceil((n_frames - 1 - length) / (n_samples - 1)))

        out = []
        for begin_index in indices[::stride]:
            if len(out) >= n_samples:
                break
            end_index = min(indices[-1] + 1, begin_index + length)
            sample = list(range(begin_index, end_index))

            if len(sample) < length:
                out.append([clip[i] for i in looppadding(sample, length=length)])
               # out.append(clip[looppadding(sample, length=length)])
                break
            else:
                out.append([clip[i] for i in sample[::2]])
               # out.append(clip[sample[::2]])

        return out


class TemporalTransform(object):
    def __init__(self,length,mode="center"):
        self.mode = mode
        self.length = length
        #pass dummpy in order to catch incoored mode
        self.__call__(range(128))
        
    def __call__(self, clip):
        if self.mode == "random":
            return random_temporal_crop(clip,self.length)
        elif self.mode == "center":
            return temporal_center_crop(clip,self.length)
        elif self.mode == "all" or self.mode == "nocrop":
            #note that length cannot be satisfied!
            return use_all_frames(clip)
        elif self.mode == "slide":
            #note that output has one more dimention
            return temporal_sliding_window(clip,self.length)
        elif self.mode == "even":
            return temporal_even_crop(clip, self.length, n_samples=5)
        else:
            raise NotImplementedError("this option is not defined:",self.mode)