|
def detect_objects_in_video(video_path): |
|
temp_output_path = "/tmp/output_video.mp4" |
|
temp_frames_dir = tempfile.mkdtemp() |
|
frame_count = 0 |
|
previous_detections = {} # Untuk menyimpan deteksi objek dari frame sebelumnya |
|
|
|
# Inisialisasi DINO-X untuk deteksi unclassified products |
|
dinox_config = Config(DINOX_API_KEY) |
|
dinox_client = Client(dinox_config) |
|
|
|
try: |
|
# Convert video to MP4 if necessary |
|
if not video_path.endswith(".mp4"): |
|
video_path, err = convert_video_to_mp4(video_path, temp_output_path) |
|
if not video_path: |
|
return None, f"Video conversion error: {err}" |
|
|
|
# Read video and process frames |
|
video = cv2.VideoCapture(video_path) |
|
frame_rate = int(video.get(cv2.CAP_PROP_FPS)) |
|
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) |
|
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
|
frame_size = (frame_width, frame_height) |
|
|
|
# VideoWriter for output video |
|
fourcc = cv2.VideoWriter_fourcc(*'mp4v') |
|
output_video = cv2.VideoWriter(temp_output_path, fourcc, frame_rate, frame_size) |
|
|
|
while True: |
|
ret, frame = video.read() |
|
if not ret: |
|
break |
|
|
|
# Save frame temporarily for predictions |
|
frame_path = os.path.join(temp_frames_dir, f"frame_{frame_count}.jpg") |
|
cv2.imwrite(frame_path, frame) |
|
|
|
# Process predictions for the current frame using YOLO (Nestlé products) |
|
yolo_pred = yolo_model.predict(frame_path, confidence=50, overlap=80).json() |
|
|
|
# Track current frame detections (Nestlé) |
|
current_detections = {} |
|
for prediction in yolo_pred['predictions']: |
|
class_name = prediction['class'] |
|
x, y, w, h = prediction['x'], prediction['y'], prediction['width'], prediction['height'] |
|
object_id = f"{class_name}_{x}_{y}" |
|
|
|
if object_id not in current_detections: |
|
current_detections[object_id] = class_name |
|
|
|
# Draw bounding box for detected products |
|
cv2.rectangle(frame, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0,255,0), 2) |
|
cv2.putText(frame, class_name, (int(x-w/2), int(y-h/2-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2) |
|
|
|
# Calculate product counts (Nestlé) |
|
nestle_counts = {} |
|
for detection_id in current_detections.keys(): |
|
class_name = current_detections[detection_id] |
|
nestle_counts[class_name] = nestle_counts.get(class_name, 0) + 1 |
|
|
|
# Update previous_detections for the next frame |
|
previous_detections = current_detections |
|
|
|
# --- Deteksi Unclassified Products menggunakan DINO-X --- |
|
image_url = dinox_client.upload_file(frame_path) |
|
task = DinoxTask( |
|
image_url=image_url, |
|
prompts=[TextPrompt(text=DINOX_PROMPT)] # Define the DINO-X prompt here |
|
) |
|
dinox_client.run_task(task) |
|
dinox_pred = task.result.objects |
|
|
|
# Filter & Hitung Unclassified Products |
|
unclassified_counts = {} |
|
for obj in dinox_pred: |
|
class_name = obj.category.strip().lower() |
|
if class_name not in unclassified_counts: |
|
unclassified_counts[class_name] = 1 |
|
else: |
|
unclassified_counts[class_name] += 1 |
|
|
|
# Draw bounding box for unclassified objects |
|
x1, y1, x2, y2 = obj.bbox |
|
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2) |
|
cv2.putText(frame, f"{class_name} {obj.score:.2f}", (int(x1), int(y1-10)), |
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) |
|
|
|
# --- Teks Overlay untuk menghitung produk --- |
|
# Format counting untuk Nestlé (dari YOLO) |
|
nestle_count_text = "" |
|
total_nestle = 0 |
|
for class_name, count in nestle_counts.items(): |
|
nestle_count_text += f"{class_name}: {count}\n" |
|
total_nestle += count |
|
nestle_count_text += f"\nTotal Nestlé Products: {total_nestle}" |
|
|
|
# Format counting untuk Unclassified (dari DINO-X) |
|
unclassified_count_text = "" |
|
total_unclassified = 0 |
|
for class_name, count in unclassified_counts.items(): |
|
unclassified_count_text += f"{class_name}: {count}\n" |
|
total_unclassified += count |
|
unclassified_count_text += f"\nTotal Unclassified Products: {total_unclassified}" |
|
|
|
# Overlay teks ke frame |
|
y_offset = 20 |
|
for line in nestle_count_text.split("\n"): |
|
cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) |
|
y_offset += 30 |
|
|
|
y_offset += 30 # Slight gap between sections |
|
for line in unclassified_count_text.split("\n"): |
|
cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) |
|
y_offset += 30 |
|
|
|
# Write processed frame to output video |
|
output_video.write(frame) |
|
frame_count += 1 |
|
|
|
video.release() |
|
output_video.release() |
|
|
|
return temp_output_path |
|
|
|
except Exception as e: |
|
return None, f"An error occurred: {e}" |
|
|
|
|