Upload demo.py
Browse files
demo.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
from transformers import AutoModel
|
3 |
+
from modeling_internvideo2 import (retrieve_text, vid2tensor, _frame_from_video,)
|
4 |
+
|
5 |
+
|
6 |
+
if __name__ == '__main__':
|
7 |
+
model = AutoModel.from_pretrained("OpenGVLab/InternVideo2-Stage2_6B", trust_remote_code=True).eval()
|
8 |
+
|
9 |
+
video = cv2.VideoCapture('example1.mp4')
|
10 |
+
frames = [x for x in _frame_from_video(video)]
|
11 |
+
text_candidates = ["A playful dog and its owner wrestle in the snowy yard, chasing each other with joyous abandon.",
|
12 |
+
"A man in a gray coat walks through the snowy landscape, pulling a sleigh loaded with toys.",
|
13 |
+
"A person dressed in a blue jacket shovels the snow-covered pavement outside their house.",
|
14 |
+
"A cat excitedly runs through the yard, chasing a rabbit.",
|
15 |
+
"A person bundled up in a blanket walks through the snowy landscape, enjoying the serene winter scenery."]
|
16 |
+
|
17 |
+
texts, probs = retrieve_text(frames, text_candidates, model=model, topk=5)
|
18 |
+
for t, p in zip(texts, probs):
|
19 |
+
print(f'text: {t} ~ prob: {p:.4f}')
|
20 |
+
|
21 |
+
vidtensor = vid2tensor('example1.mp4', fnum=4)
|
22 |
+
feat = model.get_vid_feat(vidtensor)
|