rt-detr-object-detection

Running on Zero

App Files Files Community

freddyaboulton HF Staff commited on Sep 13, 2024

Commit

61732db

1 Parent(s): 6a95f1f

retry

Browse files

Files changed (1) hide show

app.py +17 -9

app.py CHANGED Viewed

@@ -2,6 +2,9 @@ import spaces
 import gradio as gr
 import cv2
 from PIL import Image
 from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
@@ -16,7 +19,7 @@ def stream_object_detection(video, conf_threshold):
     video_codec = cv2.VideoWriter_fourcc(*"x264") # type: ignore
     fps = int(cap.get(cv2.CAP_PROP_FPS))
-    desired_fps = fps // 3
     width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
@@ -24,28 +27,33 @@ def stream_object_detection(video, conf_threshold):
     n_frames = 0
     n_chunks = 0
-    name = str(current_dir / f"output_{n_chunks}.ts")
     segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
     batch = []
     while iterating:
         frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        if n_frames % 3 == 0:
             batch.append(frame)
-        if len(batch) == desired_fps:
             inputs = image_processor(images=batch, return_tensors="pt")
             with torch.no_grad():
                 outputs = model(**inputs)
             boxes = image_processor.post_process_object_detection(
                 outputs,
-                target_sizes=torch.tensor([batch[0].shape[::-1]] * len(batch)),
                 threshold=conf_threshold)
             for array, box in zip(batch, boxes):
-                pil_image = draw_bounding_boxes(Image.from_array(array), boxes[0], model, 0.3)
-                frame = numpy.array(pil_image)
                 # Convert RGB to BGR
                 frame = frame[:, :, ::-1].copy()
                 segment_file.write(frame)
@@ -54,7 +62,7 @@ def stream_object_detection(video, conf_threshold):
             n_frames = 0
             n_chunks += 1
             yield name
-            name = str(current_dir / f"output_{n_chunks}.ts")
             segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
         iterating, frame = cap.read()
@@ -83,7 +91,7 @@ with gr.Blocks(css=css) as app:
         """)
     with gr.Column(elem_classes=["my-column"]):
         with gr.Group(elem_classes=["my-group"]):
-            video = gr.Video(label="Video Source")
             conf_threshold = gr.Slider(
                 label="Confidence Threshold",
                 minimum=0.0,

 import gradio as gr
 import cv2
 from PIL import Image
+import torch
+import time
+import numpy as np
 from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
     video_codec = cv2.VideoWriter_fourcc(*"x264") # type: ignore
     fps = int(cap.get(cv2.CAP_PROP_FPS))
+    desired_fps = fps // 5
     width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     n_frames = 0
     n_chunks = 0
+    name = f"output_{n_chunks}.ts"
     segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
     batch = []
     while iterating:
         frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        if n_frames % 5 == 0:
             batch.append(frame)
+        if len(batch) == 2 * desired_fps:
             inputs = image_processor(images=batch, return_tensors="pt")
+            print(f"starting batch of size {len(batch)}")
+            start = time.time()
             with torch.no_grad():
                 outputs = model(**inputs)
+            end = time.time()
+            print("time taken ", end - start)
             boxes = image_processor.post_process_object_detection(
                 outputs,
+                target_sizes=torch.tensor([frame[0].shape[:2][::-1]] * len(batch)),
                 threshold=conf_threshold)
             for array, box in zip(batch, boxes):
+                pil_image = draw_bounding_boxes(Image.fromarray(array), box, model, conf_threshold)
+                frame = np.array(pil_image)
                 # Convert RGB to BGR
                 frame = frame[:, :, ::-1].copy()
                 segment_file.write(frame)
             n_frames = 0
             n_chunks += 1
             yield name
+            name = f"output_{n_chunks}.ts"
             segment_file = cv2.VideoWriter(name, video_codec, fps, (width, height)) # type: ignore
         iterating, frame = cap.read()
         """)
     with gr.Column(elem_classes=["my-column"]):
         with gr.Group(elem_classes=["my-group"]):
+            video = gr.Video(label="Video Source", streaming=True, autoplay=True)
             conf_threshold = gr.Slider(
                 label="Confidence Threshold",
                 minimum=0.0,