File size: 2,858 Bytes
0f220e9
 
 
 
c6939df
0f220e9
 
 
c6939df
 
 
 
0f220e9
c6939df
 
0f220e9
c6939df
0f220e9
c6939df
0f220e9
c6939df
 
 
0f220e9
c6939df
0f220e9
c6939df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f220e9
c6939df
0f220e9
 
c6939df
0f220e9
c6939df
0f220e9
c6939df
0f220e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import tensorflow as tf
import cv2
import numpy as np
import config
from logger import logging



def preprocess_video(video_path : str) -> tuple[tf.Tensor, list] :
    """
    Preprocess the video by keeping the required number of frames, 
    resizing the frames and normalizing the frames.

    params : 
    video_path : path of the video file

    returns :

    Returns tuple (input_tensor, frame_list)

    input_tensor : video with required number of frames and size
    frame_list : list of required number of frames 
    """

    logging.info(">>> Preprocessing the video....")

    # load the video
    video_capture = cv2.VideoCapture(video_path)

    # the number of frames in the original video
    original_number_of_frames = video_capture.get(cv2.CAP_PROP_FRAME_COUNT)

    # gap between two consecutive frames to capture
    frame_interval = int(original_number_of_frames / config.FRAME_NUM)

    new_video , frame_list = [] , []
    for i in range(0, config.FRAME_NUM  ):
      video_capture.set(cv2.CAP_PROP_POS_FRAMES, i*frame_interval)
      success, frame = video_capture.read()

      if not success :
        logging.info("video loading failed")
        break
      
      frame_list.append(frame)
      # Resize the Frame to fixed height and width.
      resized_frame = cv2.resize(frame, (config.FRAME_HT, config.FRAME_WD))
      
      # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
      normalized_frame = resized_frame / 255
      
      # Append the normalized frame into the frames list
      new_video.append(normalized_frame)

    new_video_array = np.asarray(new_video)

    input_tensor = tf.expand_dims(new_video_array, axis=0)


    video_capture.release()

    logging.info("Video processing successful.")

    return input_tensor, frame_list


# Get top_k labels and probabilities
def get_top_k(probs, label_map,k=5 ):
    """Outputs the top k model labels and probabilities on the given video.

    Args:
        probs: probability tensor of shape (num_frames, num_classes) that represents
        the probability of each class on each frame.
        k: the number of top predictions to select.
        label_map: a list of labels to map logit indices to label strings.

    Returns:
        a tuple of the top-k labels and probabilities.
    """
    # Sort predictions to find top_k
    indices = tf.argsort(probs, direction='DESCENDING').numpy()[0][:k]
    # collect the labels of top_k predictions
    labels = tf.gather(label_map, indices).numpy()
    # decode lablels
    labels = [label.decode('utf8') for label in labels]
    # top_k probabilities of the predictions
    top_probs = tf.gather(probs[0], indices).numpy()
    
    output = dict()
    for label, prob in zip(labels, top_probs):
        output[label] = float(prob) / 100
    return output