Spaces:

muhammadsalmanalfaridzi
/

testing-roboflow

Sleeping

App Files Files Community

testing-roboflow / dino.txt

muhammadsalmanalfaridzi

Update dino.txt

586e4bb verified about 2 months ago

raw

history blame

5.52 kB

	def detect_objects_in_video(video_path):
	temp_output_path = "/tmp/output_video.mp4"
	temp_frames_dir = tempfile.mkdtemp()
	frame_count = 0
	previous_detections = {} # Untuk menyimpan deteksi objek dari frame sebelumnya

	# Inisialisasi DINO-X untuk deteksi unclassified products
	dinox_config = Config(DINOX_API_KEY)
	dinox_client = Client(dinox_config)

	try:
	# Convert video to MP4 if necessary
	if not video_path.endswith(".mp4"):
	video_path, err = convert_video_to_mp4(video_path, temp_output_path)
	if not video_path:
	return None, f"Video conversion error: {err}"

	# Read video and process frames
	video = cv2.VideoCapture(video_path)
	frame_rate = int(video.get(cv2.CAP_PROP_FPS))
	frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
	frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
	frame_size = (frame_width, frame_height)

	# VideoWriter for output video
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	output_video = cv2.VideoWriter(temp_output_path, fourcc, frame_rate, frame_size)

	while True:
	ret, frame = video.read()
	if not ret:
	break

	# Save frame temporarily for predictions
	frame_path = os.path.join(temp_frames_dir, f"frame_{frame_count}.jpg")
	cv2.imwrite(frame_path, frame)

	# Process predictions for the current frame using YOLO (Nestlé products)
	yolo_pred = yolo_model.predict(frame_path, confidence=50, overlap=80).json()

	# Track current frame detections (Nestlé)
	current_detections = {}
	for prediction in yolo_pred['predictions']:
	class_name = prediction['class']
	x, y, w, h = prediction['x'], prediction['y'], prediction['width'], prediction['height']
	object_id = f"{class_name}_{x}_{y}"

	if object_id not in current_detections:
	current_detections[object_id] = class_name

	# Draw bounding box for detected products
	cv2.rectangle(frame, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0,255,0), 2)
	cv2.putText(frame, class_name, (int(x-w/2), int(y-h/2-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)

	# Calculate product counts (Nestlé)
	nestle_counts = {}
	for detection_id in current_detections.keys():
	class_name = current_detections[detection_id]
	nestle_counts[class_name] = nestle_counts.get(class_name, 0) + 1

	# Update previous_detections for the next frame
	previous_detections = current_detections

	# --- Deteksi Unclassified Products menggunakan DINO-X ---
	image_url = dinox_client.upload_file(frame_path)
	task = DinoxTask(
	image_url=image_url,
	prompts=[TextPrompt(text=DINOX_PROMPT)] # Define the DINO-X prompt here
	)
	dinox_client.run_task(task)
	dinox_pred = task.result.objects

	# Filter & Hitung Unclassified Products
	unclassified_counts = {}
	for obj in dinox_pred:
	class_name = obj.category.strip().lower()
	if class_name not in unclassified_counts:
	unclassified_counts[class_name] = 1
	else:
	unclassified_counts[class_name] += 1

	# Draw bounding box for unclassified objects
	x1, y1, x2, y2 = obj.bbox
	cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
	cv2.putText(frame, f"{class_name} {obj.score:.2f}", (int(x1), int(y1-10)),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

	# --- Teks Overlay untuk menghitung produk ---
	# Format counting untuk Nestlé (dari YOLO)
	nestle_count_text = ""
	total_nestle = 0
	for class_name, count in nestle_counts.items():
	nestle_count_text += f"{class_name}: {count}\n"
	total_nestle += count
	nestle_count_text += f"\nTotal Nestlé Products: {total_nestle}"

	# Format counting untuk Unclassified (dari DINO-X)
	unclassified_count_text = ""
	total_unclassified = 0
	for class_name, count in unclassified_counts.items():
	unclassified_count_text += f"{class_name}: {count}\n"
	total_unclassified += count
	unclassified_count_text += f"\nTotal Unclassified Products: {total_unclassified}"

	# Overlay teks ke frame
	y_offset = 20
	for line in nestle_count_text.split("\n"):
	cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
	y_offset += 30

	y_offset += 30 # Slight gap between sections
	for line in unclassified_count_text.split("\n"):
	cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
	y_offset += 30

	# Write processed frame to output video
	output_video.write(frame)
	frame_count += 1

	video.release()
	output_video.release()

	return temp_output_path

	except Exception as e:
	return None, f"An error occurred: {e}"