testing-roboflow / dino.txt
muhammadsalmanalfaridzi's picture
Update dino.txt
586e4bb verified
raw
history blame
5.52 kB
def detect_objects_in_video(video_path):
temp_output_path = "/tmp/output_video.mp4"
temp_frames_dir = tempfile.mkdtemp()
frame_count = 0
previous_detections = {} # Untuk menyimpan deteksi objek dari frame sebelumnya
# Inisialisasi DINO-X untuk deteksi unclassified products
dinox_config = Config(DINOX_API_KEY)
dinox_client = Client(dinox_config)
try:
# Convert video to MP4 if necessary
if not video_path.endswith(".mp4"):
video_path, err = convert_video_to_mp4(video_path, temp_output_path)
if not video_path:
return None, f"Video conversion error: {err}"
# Read video and process frames
video = cv2.VideoCapture(video_path)
frame_rate = int(video.get(cv2.CAP_PROP_FPS))
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_size = (frame_width, frame_height)
# VideoWriter for output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_video = cv2.VideoWriter(temp_output_path, fourcc, frame_rate, frame_size)
while True:
ret, frame = video.read()
if not ret:
break
# Save frame temporarily for predictions
frame_path = os.path.join(temp_frames_dir, f"frame_{frame_count}.jpg")
cv2.imwrite(frame_path, frame)
# Process predictions for the current frame using YOLO (Nestlé products)
yolo_pred = yolo_model.predict(frame_path, confidence=50, overlap=80).json()
# Track current frame detections (Nestlé)
current_detections = {}
for prediction in yolo_pred['predictions']:
class_name = prediction['class']
x, y, w, h = prediction['x'], prediction['y'], prediction['width'], prediction['height']
object_id = f"{class_name}_{x}_{y}"
if object_id not in current_detections:
current_detections[object_id] = class_name
# Draw bounding box for detected products
cv2.rectangle(frame, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0,255,0), 2)
cv2.putText(frame, class_name, (int(x-w/2), int(y-h/2-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
# Calculate product counts (Nestlé)
nestle_counts = {}
for detection_id in current_detections.keys():
class_name = current_detections[detection_id]
nestle_counts[class_name] = nestle_counts.get(class_name, 0) + 1
# Update previous_detections for the next frame
previous_detections = current_detections
# --- Deteksi Unclassified Products menggunakan DINO-X ---
image_url = dinox_client.upload_file(frame_path)
task = DinoxTask(
image_url=image_url,
prompts=[TextPrompt(text=DINOX_PROMPT)] # Define the DINO-X prompt here
)
dinox_client.run_task(task)
dinox_pred = task.result.objects
# Filter & Hitung Unclassified Products
unclassified_counts = {}
for obj in dinox_pred:
class_name = obj.category.strip().lower()
if class_name not in unclassified_counts:
unclassified_counts[class_name] = 1
else:
unclassified_counts[class_name] += 1
# Draw bounding box for unclassified objects
x1, y1, x2, y2 = obj.bbox
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
cv2.putText(frame, f"{class_name} {obj.score:.2f}", (int(x1), int(y1-10)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
# --- Teks Overlay untuk menghitung produk ---
# Format counting untuk Nestlé (dari YOLO)
nestle_count_text = ""
total_nestle = 0
for class_name, count in nestle_counts.items():
nestle_count_text += f"{class_name}: {count}\n"
total_nestle += count
nestle_count_text += f"\nTotal Nestlé Products: {total_nestle}"
# Format counting untuk Unclassified (dari DINO-X)
unclassified_count_text = ""
total_unclassified = 0
for class_name, count in unclassified_counts.items():
unclassified_count_text += f"{class_name}: {count}\n"
total_unclassified += count
unclassified_count_text += f"\nTotal Unclassified Products: {total_unclassified}"
# Overlay teks ke frame
y_offset = 20
for line in nestle_count_text.split("\n"):
cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
y_offset += 30
y_offset += 30 # Slight gap between sections
for line in unclassified_count_text.split("\n"):
cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
y_offset += 30
# Write processed frame to output video
output_video.write(frame)
frame_count += 1
video.release()
output_video.release()
return temp_output_path
except Exception as e:
return None, f"An error occurred: {e}"