techysanoj commited on
Commit
69c84d2
·
1 Parent(s): c81d277

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -18
app.py CHANGED
@@ -3,6 +3,7 @@ import cv2
3
  import torch
4
  from PIL import Image
5
  from transformers import DetrImageProcessor, DetrForObjectDetection
 
6
 
7
  # Load the pre-trained DETR model
8
  processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
@@ -33,33 +34,54 @@ def image_object_detection(image_pil):
33
  return image_np
34
 
35
  # Function for live object detection from the camera
36
- def live_object_detection(image_pil):
37
- # Process the frame with the DETR model
38
- inputs = processor(images=image_pil, return_tensors="pt")
39
- outputs = model(**inputs)
40
 
41
- # convert outputs (bounding boxes and class logits) to COCO API
42
- # let's only keep detections with score > 0.9
43
- target_sizes = torch.tensor([image_pil.size[::-1]])
44
- results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
45
 
46
- # Draw bounding boxes on the image
47
- for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
48
- box = [int(round(i)) for i in box.tolist()]
49
- cv2.rectangle(image_pil, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
50
- label_text = f"{model.config.id2label[label.item()]}: {round(score.item(), 3)}"
51
- cv2.putText(image_pil, label_text, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
 
 
 
 
 
52
 
53
- return image_pil
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  # Define the Gradio interface
56
  iface = gr.Interface(
57
  fn=[image_object_detection, live_object_detection],
58
  inputs=[
59
- gr.Image(type="pil", label="Upload an image for object detection"),
60
- "webcam",
 
 
 
61
  ],
62
- outputs=["image", "image"],
63
  live=True,
64
  )
65
 
 
3
  import torch
4
  from PIL import Image
5
  from transformers import DetrImageProcessor, DetrForObjectDetection
6
+ import numpy as np
7
 
8
  # Load the pre-trained DETR model
9
  processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
 
34
  return image_np
35
 
36
  # Function for live object detection from the camera
37
+ def live_object_detection():
38
+ # Open a connection to the camera (replace with your own camera setup)
39
+ cap = cv2.VideoCapture(0)
 
40
 
41
+ while True:
42
+ # Capture frame-by-frame
43
+ ret, frame = cap.read()
 
44
 
45
+ # Convert the frame to PIL Image
46
+ frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
47
+
48
+ # Process the frame with the DETR model
49
+ inputs = processor(images=frame_pil, return_tensors="pt")
50
+ outputs = model(**inputs)
51
+
52
+ # convert outputs (bounding boxes and class logits) to COCO API
53
+ # let's only keep detections with score > 0.9
54
+ target_sizes = torch.tensor([frame_pil.size[::-1]])
55
+ results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
56
 
57
+ # Draw bounding boxes on the frame
58
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
59
+ box = [int(round(i)) for i in box.tolist()]
60
+ cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
61
+ label_text = f"{model.config.id2label[label.item()]}: {round(score.item(), 3)}"
62
+ cv2.putText(frame, label_text, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
63
+
64
+ # Display the resulting frame
65
+ cv2.imshow('Object Detection', frame)
66
+
67
+ # Break the loop when 'q' key is pressed
68
+ if cv2.waitKey(1) & 0xFF == ord('q'):
69
+ break
70
+
71
+ # Release the camera and close all windows
72
+ cap.release()
73
+ cv2.destroyAllWindows()
74
 
75
  # Define the Gradio interface
76
  iface = gr.Interface(
77
  fn=[image_object_detection, live_object_detection],
78
  inputs=[
79
+ gr.Image(type="pil", label="Upload an image for object detection") # Remove this line
80
+ ],
81
+ outputs=[
82
+ "image",
83
+ "image",
84
  ],
 
85
  live=True,
86
  )
87