File size: 5,604 Bytes
692e2af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
BATCH_SIZE = 8 # Change this to your desired batch size
CUDA_PATH = "/usr/local/cuda-12.3/" # Change this to your CUDA path


from datetime import datetime
import os 
import sys
# set CUDA_HOME
os.environ["CUDA_HOME"] = CUDA_PATH

import gradio as gr
from tqdm import tqdm
import cv2
import os
import time

from owl_batch import owl_batch_video

# global CSV_PATH # csv that contains video names and detection results
# global POS_ZIP # zip of positive videos and individual results
# global NEG_ZIP # zip of negative videos and individual results

def run_owl_batch(

    input_vids : list[str] | str,

    target_prompt: str,

    species_prompt: str,

    conf_threshold: float,

    fps_processed: int,

    scaling_factor: float

) -> tuple[str, str, str]:
    """

    args: 

        input_vids: list of video paths

        target_prompt: prompt to search for

        species_prompt: prompt to query 

        threshold: threshold for detection

        fps_processed: number of frames per second to process

        scaling_factor: factor to scale the frames by

    returns: 

        csv_path: path to csv file

        pos_zip: path to zip file of positive videos

        neg_zip: path to zip file of negative videos

    """
    start_time = time.time()
    if type(input_vids) == str:
        input_vids = [input_vids]
    for vid in input_vids:
        new_input_vid = vid.replace(" ", "_") # make sure there are no spaces in the name
        os.rename(vid, new_input_vid)

    # species prompt has to contain target prompt, otherwise add it
    if target_prompt not in species_prompt:
        species_prompt = f"{species_prompt}, {target_prompt}"
    
    # turn target prompt into a list
    target_prompt = target_prompt.split(", ")

    now = datetime.now()
    timestamp = now.strftime("%Y-%m-%d_%H-%M")

    zip_path = owl_batch_video(
                    input_vids, 
                    target_prompt, 
                    species_prompt,
                    conf_threshold, 
                    fps_processed=fps_processed, 
                    scaling_factor=1/scaling_factor, 
                    batch_size=BATCH_SIZE, 
                    save_dir=f"temp_{timestamp}")

    end_time = time.time()
    print(f'Processing time: {end_time - start_time} seconds')
    return zip_path


with gr.Blocks() as demo:
    gr.HTML(
        """

            <h1 align="center" style="font-size:xxx-large">🦍 Primate Detection</h1>

        """
    )
    
    with gr.Row():
        with gr.Column():
            input = gr.File(label="Upload Videos", file_types=['.mp4', '.mov'], file_count="multiple")
            target_prompt = gr.Textbox(label="What do you want to detect? (Multiple species should be separated by commas)")
            species_prompt = gr.Textbox(label="Which species are in your dataset? (Multiple species should be separated by commas)")
            with gr.Accordion("Advanced Options", open=False):
                conf_threshold = gr.Slider(
                    label="Confidence Threshold",
                    info="Adjust the threshold to change the sensitivity of the model, lower thresholds being more sensitive.",
                    minimum=0.0,
                    maximum=1.0,
                    value=0.3,
                    step=0.05
                )
                fps_processed = gr.Slider(
                    label="Frame Detection Rate",
                    info="Adjust the frame detection rate. I.e. a value of 120 will run detection every 120 frames, a value of 1 will run detection on every frame. Note: the lower the number the slower the processing time.",
                    minimum=1,
                    maximum=120,
                    value=10, 
                    step=1)
                scaling_factor = gr.Slider(
                    label="Downsample Factor",
                    info="Adjust the downsample factor. Note: the higher the number the faster the processing time but lower the accuracy.",
                    minimum=1,
                    maximum=10,
                    value=4,
                    step=1
                )
            with gr.Row():
                    clear_btn = gr.ClearButton(components=[input, target_prompt, species_prompt])
                    run_btn = gr.Button(value="Run Detection", variant='primary')
        with gr.Column():
            download_file = gr.Files(label="CSV, Video Output", interactive=False)
    
    run_btn.click(fn=run_owl_batch, inputs=[input, target_prompt, species_prompt, conf_threshold, fps_processed, scaling_factor], outputs=[download_file])
    
    gr.DuplicateButton()
    
    gr.Markdown(
        """

        ## Frequently Asked Questions 

        

        ##### How can I run the interface on my own computer? 

        By clicking on the three dots on the top right corner of the interface, you will be able to clone the repository or run it with a Docker image on your local machine. \

        For local machine setup instructions please check the README file. 

        ##### The video is very slow to process, how can I speed it up?

        You can speed up the processing by adjusting the frame detection rate in the advanced options. The lower the number the slower the processing time. Choosing only\

        bounding boxes will make the processing faster. You can also duplicate the space using the Duplicate Button and choose a different GPU which will make the processing faster.

        """
    )

demo.launch(share=True)