File size: 3,416 Bytes
37f5c2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9900aa
 
37f5c2f
5cd7d27
37f5c2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cd7d27
37f5c2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9900aa
 
37f5c2f
c9900aa
37f5c2f
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

from yolox.exp import get_exp
from yolox.data.datasets import COCO_CLASSES
from predictor import Predictor

import cv2
import gradio as gr
import torch

import subprocess
import tempfile
import time
from pathlib import Path

exp = get_exp("exps/openlenda_s.py", None)
model = exp.get_model()
model.eval()
ckpt_file = "models/openlenda_s.pth"
model.load_state_dict(torch.load(ckpt_file, map_location="cpu")["model"])
predictor = Predictor(
    model, COCO_CLASSES, "cpu", False, False
)


def image_inference(image, confthre, nmsthre):
    cv2.cvtColor(image, cv2.COLOR_RGB2BGR, image)
    outputs, img_info = predictor.inference(image, confthre, nmsthre)
    result_image = predictor.visual(outputs[0], img_info)
    cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB, result_image)
    return result_image


image_interface = gr.Interface(
    fn=image_inference,
    inputs=[
        "image",
        gr.Slider(0.01, 1, value=0.4, step=0.01, label="Confidence Threshold", ),
        gr.Slider(0.01, 1, value=0.01, step=0.01, label="NMS Threshold")
    ],
    examples=[["assets/sample.png", 0.4, 0.01]],
    outputs=gr.Image(type="pil"),
    title="OpenLenda image demo"
)


def video_inference(video_file, confthre, nmsthre, start_sec, duration):
    start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec))
    end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration))

    suffix = Path(video_file).suffix

    clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix)
    subprocess.call(
        f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split()
    )

    cap = cv2.VideoCapture(clip_temp_file.name)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file:
        out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))

        num_frames = 0
        max_frames = duration * fps
        while cap.isOpened():
            try:
                ret, frame = cap.read()
                if not ret:
                    break
            except Exception as e:
                print(e)
                continue
            outputs, img_info = predictor.inference(frame, confthre, nmsthre)
            result_frame = predictor.visual(outputs[0], img_info)
            out.write(result_frame)
            num_frames += 1
            if num_frames == max_frames:
                break

        out.release()

        out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False)
        subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split())

    return out_file.name


video_interface = gr.Interface(
    fn=video_inference,
    inputs=[
        gr.Video(),
        gr.Slider(0.01, 1, value=0.5, step=0.01, label="Confidence Threshold", ),
        gr.Slider(0.01, 1, value=0.01, step=0.01, label="NMS Threshold"),
        gr.Slider(0, 60, value=0, step=1, label="Start Second"),
        gr.Slider(1, 10, value=3, step=1, label="Duration"),
    ],
    outputs=gr.Video(),
    title="OpenLenda video demo"
)

if __name__ == "__main__":
    gr.TabbedInterface(
        [image_interface, video_interface],
        ["Image", "Video"],
        title="OpenLenda demo!",
    ).launch()