archit11 commited on
Commit
883ce2d
·
verified ·
1 Parent(s): 66c91bd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +75 -2
README.md CHANGED
@@ -7,6 +7,7 @@ tags:
7
  - video-classification
8
  - ucf-crime
9
  - vandalism-dectection
 
10
  metrics:
11
  - accuracy
12
  model-index:
@@ -30,8 +31,80 @@ More information needed
30
 
31
  ## Intended uses & limitations
32
 
33
- More information needed
34
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  ## Training and evaluation data
36
 
37
  More information needed
 
7
  - video-classification
8
  - ucf-crime
9
  - vandalism-dectection
10
+ - videomae
11
  metrics:
12
  - accuracy
13
  model-index:
 
31
 
32
  ## Intended uses & limitations
33
 
34
+ Usage:
35
+ ```
36
+ import av
37
+ import torch
38
+ import numpy as np
39
+
40
+ from transformers import AutoImageProcessor, VideoMAEForVideoClassification
41
+ from huggingface_hub import hf_hub_download
42
+
43
+ np.random.seed(0)
44
+
45
+
46
+ def read_video_pyav(container, indices):
47
+ '''
48
+ Decode the video with PyAV decoder.
49
+ Args:
50
+ container (`av.container.input.InputContainer`): PyAV container.
51
+ indices (`List[int]`): List of frame indices to decode.
52
+ Returns:
53
+ result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3).
54
+ '''
55
+ frames = []
56
+ container.seek(0)
57
+ start_index = indices[0]
58
+ end_index = indices[-1]
59
+ for i, frame in enumerate(container.decode(video=0)):
60
+ if i > end_index:
61
+ break
62
+ if i >= start_index and i in indices:
63
+ frames.append(frame)
64
+ return np.stack([x.to_ndarray(format="rgb24") for x in frames])
65
+
66
+
67
+ def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
68
+ '''
69
+ Sample a given number of frame indices from the video.
70
+ Args:
71
+ clip_len (`int`): Total number of frames to sample.
72
+ frame_sample_rate (`int`): Sample every n-th frame.
73
+ seg_len (`int`): Maximum allowed index of sample's last frame.
74
+ Returns:
75
+ indices (`List[int]`): List of sampled frame indices
76
+ '''
77
+ converted_len = int(clip_len * frame_sample_rate)
78
+ end_idx = np.random.randint(converted_len, seg_len)
79
+ start_idx = end_idx - converted_len
80
+ indices = np.linspace(start_idx, end_idx, num=clip_len)
81
+ indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
82
+ return indices
83
+
84
+
85
+ # video clip consists of 300 frames (10 seconds at 30 FPS)
86
+ file_path = hf_hub_download(
87
+ repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
88
+ )
89
+ container = av.open(file_path)
90
+
91
+ # sample 16 frames
92
+ indices = sample_frame_indices(clip_len=16, frame_sample_rate=1, seg_len=container.streams.video[0].frames)
93
+ video = read_video_pyav(container, indices)
94
+
95
+ image_processor = AutoImageProcessor.from_pretrained("videomae-base-finetuned-ucfcrime-full")
96
+ model = VideoMAEForVideoClassification.from_pretrained("videomae-base-finetuned-ucfcrime-full")
97
+
98
+ inputs = image_processor(list(video), return_tensors="pt")
99
+
100
+ with torch.no_grad():
101
+ outputs = model(**inputs)
102
+ logits = outputs.logits
103
+
104
+ # model predicts one of the 400 Kinetics-400 classes
105
+ predicted_label = logits.argmax(-1).item()
106
+ print(model.config.id2label[predicted_label])
107
+ ```
108
  ## Training and evaluation data
109
 
110
  More information needed