Create dino.txt
Browse files
dino.txt
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def detect_objects_in_video_with_dinox(video_path):
|
2 |
+
temp_output_path = "/tmp/output_video.mp4"
|
3 |
+
temp_frames_dir = tempfile.mkdtemp()
|
4 |
+
frame_count = 0
|
5 |
+
previous_detections = {} # For keeping track of detections in previous frames
|
6 |
+
|
7 |
+
# DINO-X Configuration (For unclassified products)
|
8 |
+
dinox_config = Config(DINOX_API_KEY)
|
9 |
+
dinox_client = Client(dinox_config)
|
10 |
+
DINOX_PROMPT = "beverage . bottle . cans . boxed milk . milk" # Customize based on your unclassified products
|
11 |
+
|
12 |
+
try:
|
13 |
+
# Convert video to MP4 if necessary
|
14 |
+
if not video_path.endswith(".mp4"):
|
15 |
+
video_path, err = convert_video_to_mp4(video_path, temp_output_path)
|
16 |
+
if not video_path:
|
17 |
+
return None, f"Video conversion error: {err}"
|
18 |
+
|
19 |
+
# Read video and process frames
|
20 |
+
video = cv2.VideoCapture(video_path)
|
21 |
+
frame_rate = int(video.get(cv2.CAP_PROP_FPS))
|
22 |
+
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
23 |
+
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
24 |
+
frame_size = (frame_width, frame_height)
|
25 |
+
|
26 |
+
# VideoWriter for output video
|
27 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
28 |
+
output_video = cv2.VideoWriter(temp_output_path, fourcc, frame_rate, frame_size)
|
29 |
+
|
30 |
+
while True:
|
31 |
+
ret, frame = video.read()
|
32 |
+
if not ret:
|
33 |
+
break
|
34 |
+
|
35 |
+
# Save frame temporarily for predictions
|
36 |
+
frame_path = os.path.join(temp_frames_dir, f"frame_{frame_count}.jpg")
|
37 |
+
cv2.imwrite(frame_path, frame)
|
38 |
+
|
39 |
+
# ================== YOLO Detection (Nestlé Products) ==================
|
40 |
+
predictions = yolo_model.predict(frame_path, confidence=50, overlap=80).json()
|
41 |
+
|
42 |
+
# Track current frame detections for YOLO
|
43 |
+
current_detections_yolo = {}
|
44 |
+
for prediction in predictions['predictions']:
|
45 |
+
class_name = prediction['class']
|
46 |
+
x, y, w, h = prediction['x'], prediction['y'], prediction['width'], prediction['height']
|
47 |
+
object_id = f"{class_name}_{x}_{y}"
|
48 |
+
|
49 |
+
if object_id not in current_detections_yolo:
|
50 |
+
current_detections_yolo[object_id] = class_name
|
51 |
+
|
52 |
+
# Draw bounding box for detected products
|
53 |
+
cv2.rectangle(frame, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0,255,0), 2)
|
54 |
+
cv2.putText(frame, class_name, (int(x-w/2), int(y-h/2-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
|
55 |
+
|
56 |
+
# ================== DINO-X Detection (Unclassified Products) ==================
|
57 |
+
image_url = dinox_client.upload_file(frame_path)
|
58 |
+
task = DinoxTask(
|
59 |
+
image_url=image_url,
|
60 |
+
prompts=[TextPrompt(text=DINOX_PROMPT)]
|
61 |
+
)
|
62 |
+
dinox_client.run_task(task)
|
63 |
+
dinox_pred = task.result.objects
|
64 |
+
|
65 |
+
# Process DINO-X detections for unclassified products
|
66 |
+
current_detections_dinox = {}
|
67 |
+
for obj in dinox_pred:
|
68 |
+
dinox_box = obj.bbox
|
69 |
+
class_name = obj.category.strip().lower()
|
70 |
+
|
71 |
+
# Check if the object is unclassified
|
72 |
+
object_id = f"{class_name}_{dinox_box[0]}_{dinox_box[1]}"
|
73 |
+
|
74 |
+
if object_id not in current_detections_dinox:
|
75 |
+
current_detections_dinox[object_id] = class_name
|
76 |
+
|
77 |
+
# Draw bounding box for unclassified products
|
78 |
+
x1, y1, x2, y2 = dinox_box
|
79 |
+
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
|
80 |
+
cv2.putText(frame, f"{class_name} {obj.score:.2f}", (int(x1), int(y1-10)),
|
81 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
|
82 |
+
|
83 |
+
# ================== Update Counts ==================
|
84 |
+
all_class_count = {}
|
85 |
+
for class_name in current_detections_yolo.values():
|
86 |
+
all_class_count[class_name] = all_class_count.get(class_name, 0) + 1
|
87 |
+
|
88 |
+
for class_name in current_detections_dinox.values():
|
89 |
+
all_class_count[class_name] = all_class_count.get(class_name, 0) + 1
|
90 |
+
|
91 |
+
# Generate count text
|
92 |
+
count_text = ""
|
93 |
+
total_product_count = 0
|
94 |
+
for class_name, count in all_class_count.items():
|
95 |
+
count_text += f"{class_name}: {count}\n"
|
96 |
+
total_product_count += count
|
97 |
+
count_text += f"\nTotal Product: {total_product_count}"
|
98 |
+
|
99 |
+
# Overlay the counts text onto the frame
|
100 |
+
y_offset = 20
|
101 |
+
for line in count_text.split("\n"):
|
102 |
+
cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
103 |
+
y_offset += 30 # Move down for next line
|
104 |
+
|
105 |
+
# Write processed frame to output video
|
106 |
+
output_video.write(frame)
|
107 |
+
frame_count += 1
|
108 |
+
|
109 |
+
# Update previous detections for the next frame
|
110 |
+
previous_detections = current_detections_yolo
|
111 |
+
|
112 |
+
video.release()
|
113 |
+
output_video.release()
|
114 |
+
|
115 |
+
return temp_output_path
|
116 |
+
|
117 |
+
except Exception as e:
|
118 |
+
return None, f"An error occurred: {e}"
|