rt-detr-object-detection

Building on Zero

App Files Files Community

freddyaboulton HF Staff commited on Sep 11, 2024

Commit

790227b

1 Parent(s): 66947f7

format

Browse files

Files changed (1) hide show

app.py +34 -13

app.py CHANGED Viewed

@@ -8,10 +8,13 @@ from PIL import Image, ImageDraw, ImageFont
 image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
 model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
 def draw_bounding_boxes(image, results, model, threshold=0.3):
     draw = ImageDraw.Draw(image)
     for result in results:
-        for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):
             if score > threshold:
                 label = model.config.id2label[label_id.item()]
                 box = [round(i) for i in box.tolist()]
@@ -22,13 +25,14 @@ def draw_bounding_boxes(image, results, model, threshold=0.3):
 @spaces.GPU
 def inference(image, conf_threshold):
     inputs = image_processor(images=image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
-    results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3)
     return draw_bounding_boxes(image, results, model, threshold=conf_threshold)
@@ -37,7 +41,14 @@ def app():
     with gr.Blocks():
         with gr.Row():
             with gr.Column():
-                image = gr.Image(type="pil", label="Image", visible=True, sources="webcam", height=500, width=500)
                 conf_threshold = gr.Slider(
                     label="Confidence Threshold",
                     minimum=0.0,
@@ -50,10 +61,11 @@ def app():
             inputs=[image, conf_threshold],
             outputs=[image],
             stream_every=0.2,
-            time_limit=30
         )
-css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
                       .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
 with gr.Blocks(css=css) as app:
@@ -62,16 +74,25 @@ with gr.Blocks(css=css) as app:
     <h1 style='text-align: center'>
     Near Real-Time Webcam Stream with RT-DETR
     </h1>
-    """)
     gr.HTML(
         """
         <h3 style='text-align: center'>
         <a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> | <a href='https://github.com/lyuwenyu/RT-DETR' target='_blank'>github</a>
         </h3>
-        """)
-    with gr.Column(elem_classes=['my-column']):
-         with gr.Group(elem_classes=["my-group"]):
-            image = gr.Image(type="pil", label="Image", visible=True, sources="webcam", height=500, width=500)
             conf_threshold = gr.Slider(
                 label="Confidence Threshold",
                 minimum=0.0,
@@ -84,7 +105,7 @@ with gr.Blocks(css=css) as app:
                 inputs=[image, conf_threshold],
                 outputs=[image],
                 stream_every=0.2,
-                time_limit=30
             )
-if __name__ == '__main__':
     app.launch()

 image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
 model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
 def draw_bounding_boxes(image, results, model, threshold=0.3):
     draw = ImageDraw.Draw(image)
     for result in results:
+        for score, label_id, box in zip(
+            result["scores"], result["labels"], result["boxes"]
+        ):
             if score > threshold:
                 label = model.config.id2label[label_id.item()]
                 box = [round(i) for i in box.tolist()]
 @spaces.GPU
 def inference(image, conf_threshold):
     inputs = image_processor(images=image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
+    results = image_processor.post_process_object_detection(
+        outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3
+    )
     return draw_bounding_boxes(image, results, model, threshold=conf_threshold)
     with gr.Blocks():
         with gr.Row():
             with gr.Column():
+                image = gr.Image(
+                    type="pil",
+                    label="Image",
+                    visible=True,
+                    sources="webcam",
+                    height=500,
+                    width=500,
+                )
                 conf_threshold = gr.Slider(
                     label="Confidence Threshold",
                     minimum=0.0,
             inputs=[image, conf_threshold],
             outputs=[image],
             stream_every=0.2,
+            time_limit=30,
         )
+css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
                       .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
 with gr.Blocks(css=css) as app:
     <h1 style='text-align: center'>
     Near Real-Time Webcam Stream with RT-DETR
     </h1>
+    """
+    )
     gr.HTML(
         """
         <h3 style='text-align: center'>
         <a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> | <a href='https://github.com/lyuwenyu/RT-DETR' target='_blank'>github</a>
         </h3>
+        """
+    )
+    with gr.Column(elem_classes=["my-column"]):
+        with gr.Group(elem_classes=["my-group"]):
+            image = gr.Image(
+                type="pil",
+                label="Image",
+                visible=True,
+                sources="webcam",
+                height=500,
+                width=500,
+            )
             conf_threshold = gr.Slider(
                 label="Confidence Threshold",
                 minimum=0.0,
                 inputs=[image, conf_threshold],
                 outputs=[image],
                 stream_every=0.2,
+                time_limit=30,
             )
+if __name__ == "__main__":
     app.launch()