Spaces:

caltech-animal-tracking
/

Primate_Detection_V2

Sleeping

File size: 4,794 Bytes

bda28e6
a7fcb99
 
 
bfa3aba
 
 
a7fcb99
bfa3aba
 
 
 
 
 
 
 
bda28e6
bfa3aba
 
 
 
 
 
 
 
 
 
 
 
bda28e6
553f53a
 
 
bfa3aba
 
 
a7fcb99
 
bfa3aba
 
 
 
 
bda28e6
 
bfa3aba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7ae9fa
bfa3aba
 
 
 
 
 
 
 
 
 
 
 
 
 
5eaf980
d8544c2
bfa3aba
 
 
 
a7fcb99
c032acd
bfa3aba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6209508

BATCH_SIZE = 8 # Change this to your desired batch size
CUDA_PATH = "/usr/local/cuda-12.3/" # Change this to your CUDA path


import os 
import sys
# set CUDA_HOME
os.environ["CUDA_HOME"] = CUDA_PATH

import gradio as gr
from tqdm import tqdm
import cv2
import os
import numpy as np
import pandas as pd
import torch
import time

from typing import Tuple
from PIL import Image
from owl_core import owl_full_video


def run_owl(input_vid,
            text_prompt,
            confidence_threshold,
            fps_processed,
            scaling_factor
            ):
    start_time = time.time()
    new_input_vid = input_vid.replace(" ", "_")
    os.rename(input_vid, new_input_vid)
    csv_path, vid_path = owl_full_video(new_input_vid, 
                                        text_prompt, 
                                        confidence_threshold, 
                                        fps_processed=fps_processed, 
                                        scaling_factor=1/scaling_factor, 
                                        batch_size=BATCH_SIZE)

    global CSV_PATH
    CSV_PATH = csv_path
    global VID_PATH
    VID_PATH = vid_path
    end_time = time.time()
    print(f'Processing time: {end_time - start_time} seconds')
    return vid_path

def vid_download():
    """
    """
    print(CSV_PATH, VID_PATH)
    return [CSV_PATH, VID_PATH]


with gr.Blocks() as demo:
    gr.HTML(
        """
            <h1 align="center" style="font-size:xxx-large">🦍 Primate Detection</h1>
        """
    )
    
    with gr.Row():
        with gr.Column():
            input = gr.Video(label="Input Video", interactive=True)
            text_prompt = gr.Textbox(label="What do you want to detect? (Multiple species should be separated by commas)")
            with gr.Accordion("Advanced Options", open=False):
                conf_threshold = gr.Slider(
                    label="Confidence Threshold",
                    info="Adjust the threshold to change the sensitivity of the model, lower thresholds being more sensitive.",
                    minimum=0.0,
                    maximum=1.0,
                    value=0.3,
                    step=0.05
                )
                fps_processed = gr.Slider(
                    label="Frame Detection Rate",
                    info="Adjust the frame detection rate. I.e. a value of 120 will run detection every 120 frames, a value of 1 will run detection on every frame. Note: the lower the number the slower the processing time.",
                    minimum=1,
                    maximum=120,
                    value=10, 
                    step=1)
                scaling_factor = gr.Slider(
                    label="Downsample Factor",
                    info="Adjust the downsample factor. Note: the higher the number the faster the processing time but lower the accuracy.",
                    minimum=1,
                    maximum=10,
                    value=4,
                    step=1
                )

            # TODO: Make button visible only after a file has been uploaded
            run_btn = gr.Button(value="Run Detection", visible=True)
        with gr.Column():
            vid = gr.Video(label="Output Video", height=350, interactive=False, visible=True)
            # download_btn = gr.Button(value="Generate Download", visible=True)
            download_file = gr.Files(label="CSV, Video Output", interactive=False)
    
    run_btn.click(fn=run_owl, inputs=[input, text_prompt, conf_threshold, fps_processed, scaling_factor, ], outputs=[vid])
    vid.change(fn=vid_download, outputs=download_file)

    # gr.Examples(
    #     [["baboon_15s.mp4", "baboon", 0.25, 0.25, 1, 1]],
    #     inputs = [input, text_prompt, conf_threshold, fps_processed, scaling_factor],
    #     outputs = [vid],
    #     fn=run_sam_dino,
    #     cache_examples=True,
    #     label='Example'
    #   )
    
    gr.DuplicateButton()
    
    gr.Markdown(
        """
        ## Frequently Asked Questions 
        
        ##### How can I run the interface on my own computer? 
        By clicking on the three dots on the top right corner of the interface, you will be able to clone the repository or run it with a Docker image on your local machine. \
        For local machine setup instructions please check the README file. 
        ##### The video is very slow to process, how can I speed it up?
        You can speed up the processing by adjusting the frame detection rate in the advanced options. The lower the number the slower the processing time. Choosing only\
        bounding boxes will make the processing faster. You can also duplicate the space using the Duplicate Button and choose a different GPU which will make the processing faster.
        """
    )

demo.launch(share=True)