Spaces:

liruiw
/

hma

Running on Zero

App Files Files Community

hma / datasets /extern /epic_kitchen.py

LeroyWaa

draft

246c106 about 2 months ago

raw

history blame

4.61 kB

	# --------------------------------------------------------
	# Licensed under The MIT License [see LICENSE for details]
	# --------------------------------------------------------
	# https://github.com/epic-kitchens/epic-kitchens-100-hand-object-bboxes/blob/master/notebooks/demo.ipynb
	import os
	import numpy as np
	from tqdm import tqdm
	from collections import OrderedDict
	import os
	import numpy as np
	from pathlib import Path


	CURRENT_DIR = os.path.dirname(__file__)
	import cv2
	from os.path import expanduser
	from epic_kitchens.hoa.types import BBox, FloatVector, HandSide
	from epic_kitchens.hoa import load_detections

	RESOLUTION = (480, 480)
	home = expanduser("~")

	# Adjust these to the where-ever your detections and frames are stored.
	DETECTION_ROOT = f"/checkpoint/xinleic/LR/epic-kitchens-100-hand-object-bboxes/labels/hand-objects"
	FRAMES_ROOT = f"/datasets01/EPIC-KITCHENS-100"

	# DETECTION_ROOT = f'{home}/Projects/epic_kitchen_labels/hand-objects'
	# FRAMES_ROOT = f'{home}/EPIC-KITCHENS'
	detections_root = Path(DETECTION_ROOT)
	frames_root = Path(FRAMES_ROOT)


	def compute_state_and_actions(curr_frame, next_frame):
	curr_hand1, curr_hand2 = curr_frame.hands[0], curr_frame.hands[1]
	if curr_hand1.side != HandSide.LEFT: # flip
	curr_hand1, curr_hand2 = curr_hand2, curr_hand1

	# already normalized
	curr_hand1_center = curr_hand1.bbox.center
	curr_hand2_center = curr_hand2.bbox.center

	next_hand1, next_hand2 = next_frame.hands[0], next_frame.hands[1]
	if next_hand1.side != HandSide.LEFT: # flip
	next_hand1, next_hand2 = next_hand2, next_hand1

	# already normalized even
	next_hand1_center = next_hand1.bbox.center
	next_hand2_center = next_hand2.bbox.center
	state = np.concatenate((curr_hand1_center, curr_hand2_center))
	action = np.concatenate(
	(
	np.array(next_hand1_center) - np.array(curr_hand1_center),
	np.array(next_hand2_center) - np.array(curr_hand2_center),
	)
	)
	return state, action


	# define your own dataset conversion
	def convert_dataset_image():
	# convert to a list of episodes that can be added to replay buffer
	ALL_EPISODES = os.listdir(FRAMES_ROOT)
	MAX_EPISODE_LENGTH = 5000

	for EPS in ALL_EPISODES:
	rgb_path = os.path.join(FRAMES_ROOT, EPS, "rgb_frames")
	if not os.path.exists(rgb_path):
	continue
	for video_id in os.listdir(rgb_path):
	full_path = os.path.join(rgb_path, video_id)
	if (
	not full_path.endswith(".tar") and not full_path.endswith(".jpg") and not full_path.endswith("home")
	): # folder

	# action extractions over bounding boxes subtractions of both hands.
	participant_id = video_id[:3]
	video_detections = load_detections(detections_root / participant_id / (video_id + ".pkl"))
	max_frame_idx = len(video_detections) - 1
	DS_FACTOR = 1
	print(full_path)
	steps = []

	for frame_idx in range(0, max_frame_idx - DS_FACTOR, DS_FACTOR):
	# print(video_detections[frame_idx].hands)
	if (
	len(video_detections[frame_idx].hands) != 2
	or len(video_detections[frame_idx + DS_FACTOR].hands) != 2
	):
	continue

	s, a = compute_state_and_actions(
	video_detections[frame_idx], video_detections[frame_idx + DS_FACTOR]
	)
	lang = "use human hands to do some tasks" # dummies
	# print("state actions:", s, a)
	image_path = frames_root / participant_id / "rgb_frames" / video_id / f"frame_{frame_idx:010d}.jpg"
	# print(image_path)
	image = cv2.imread(str(image_path))
	if image is None:
	continue
	image = image[..., [2, 1, 0]] # RGB

	# break into step dict
	step = {
	"observation": {"image": image, "state": s},
	"action": a,
	"language_instruction": lang,
	}
	steps.append(OrderedDict(step))
	if len(steps) > MAX_EPISODE_LENGTH:
	break
	data_dict = {"steps": steps}
	print(f"max_frame_idx: {max_frame_idx} ds factor: {DS_FACTOR} {len(steps)}")
	yield data_dict