hma / sim /synthetic_data_gen_learned_sim.py
LeroyWaa's picture
draft
246c106
raw
history blame
7.5 kB
import h5py
import tqdm
import numpy as np
import torch
import cv2
import json
import imageio
from sim.simulator import GenieSimulator, ReplaySimulator
from sim.policy import ReplayPolicy
from diffusion_policy.util.pytorch_util import dict_apply
SYNTHETIC_DATA_RATIO = 0.25
DP_RES = 84
def load_demo(demo_idx: int):
with h5py.File('data/robomimic/datasets/lift/ph/image.hdf5', 'r') as f:
demo = f['data'][f'demo_{demo_idx}']
actions = demo['actions'][:].astype(np.float32)
frames = demo['obs']['agentview_image'][:].astype(np.uint8)
assert len(actions) == len(frames)
return frames, actions
def main():
demos = dict()
demos_len = []
start_idx = int(200 * (1 - SYNTHETIC_DATA_RATIO))
end_idx = 200
print(f"Generating {end_idx - start_idx} synthetic demos")
# copy actual data
for demo_idx in tqdm.tqdm(range(start_idx)):
with h5py.File('data/robomimic/datasets/lift/ph/image.hdf5', 'r') as f:
demo = f['data'][f'demo_{demo_idx}']
actions = demo['actions'][:].astype(np.float32)
frames = demo['obs']['agentview_image'][:].astype(np.uint8)
assert len(actions) == len(frames)
assert frames.shape[-3:] == (DP_RES, DP_RES, 3), frames.shape
demos[f"demo_{demo_idx}"] = {
"obs": {
"agentview_image": frames
},
"actions": actions
}
demos_len.append(len(actions))
print(f"Loaded {len(demos)} actual demos")
genie_simulator = GenieSimulator(
image_encoder_type='temporalvae',
image_encoder_ckpt='stabilityai/stable-video-diffusion-img2vid',
quantize=False,
backbone_type="stmar",
backbone_ckpt="data/mar_ckpt/robomimic_best",
prompt_horizon=11,
action_stride=1,
domain='robomimic',
)
# generate synthetic data
# synthetic data in training set only
for demo_idx in tqdm.tqdm(range(start_idx, end_idx)):
frames, actions = load_demo(demo_idx)
replay_policy = ReplayPolicy(actions, 1, 11)
replay_simulator = ReplaySimulator(frames, 11)
assert len(replay_policy) == len(replay_simulator), \
(len(replay_policy), len(replay_simulator))
# prompt genie
genie_simulator.set_initial_state((
replay_simulator.prompt(),
replay_policy.prompt()
))
image = genie_simulator.reset()
this_demo = {
"obs": {
"agentview_image": []
},
"actions": []
}
for _ in range(len(replay_policy)):
action = replay_policy.generate_action(None)
this_demo['obs']['agentview_image'].append(cv2.resize(image, (DP_RES, DP_RES)))
this_demo['actions'].append(action[0])
image = genie_simulator.step(action)['pred_next_frame']
this_demo = dict_apply(this_demo, lambda x: np.array(x))
demos[f"demo_{demo_idx}"] = this_demo
demos_len.append(len(this_demo['actions']))
with h5py.File(f'data/robomimic_synthetic/robomimic_synthetic{end_idx - start_idx}.hdf5', 'w') as f:
"""
saving format:
data (group)
total (attribute) - number of state-action samples in the dataset
env_args (attribute) - a json string that contains metadata on the environment and relevant arguments used for collecting data. Three keys: env_name, the name of the environment or task to create, env_type, one of robomimic’s supported environment types, and env_kwargs, a dictionary of keyword-arguments to be passed into the environment of type env_name.
demo_0 (group) - group for the first trajectory (every trajectory has a group)
num_samples (attribute) - the number of state-action samples in this trajectory
model_file (attribute) - the xml string corresponding to the MJCF MuJoCo model. Only present for robosuite datasets.
states (dataset) - flattened raw MuJoCo states, ordered by time. Shape (N, D) where N is the length of the trajectory, and D is the dimension of the state vector. Should be empty or have dummy values for non-robosuite datasets.
actions (dataset) - environment actions, ordered by time. Shape (N, A) where N is the length of the trajectory, and A is the action space dimension
rewards (dataset) - environment rewards, ordered by time. Shape (N,) where N is the length of the trajectory.
dones (dataset) - done signal, equal to 1 if playing the corresponding action in the state should terminate the episode. Shape (N,) where N is the length of the trajectory.
obs (group) - group for the observation keys. Each key is stored as a dataset.
<obs_key_1> (dataset) - the first observation key. Note that the name of this dataset and shape will vary. As an example, the name could be “agentview_image”, and the shape could be (N, 84, 84, 3).
...
next_obs (group) - group for the next observations.
<obs_key_1> (dataset) - the first observation key.
...
demo_1 (group) - group for the second trajectory
...
https://robomimic.github.io/docs/datasets/overview.html
"""
data_group = f.create_group('data')
data_group.attrs['total'] = sum(demos_len)
data_group.attrs['env_args'] = json.dumps({
'env_name': 'Lift',
'type': 1,
'env_kwargs': {
'has_renderer': False,
'has_offscreen_renderer': True,
'ignore_done': True,
'use_object_obs': False,
'use_camera_obs': True,
'control_freq': 20,
'controller_configs': {
'type': 'OSC_POSE',
'input_max': 1,
'input_min': -1,
'output_max': [0.05, 0.05, 0.05, 0.5, 0.5, 0.5],
'output_min': [-0.05, -0.05, -0.05, -0.5, -0.5, -0.5],
'kp': 150,
'damping': 1,
'impedance_mode': 'fixed',
'kp_limits': [0, 300],
'damping_limits': [0, 10],
'position_limits': None,
'orientation_limits': None,
'uncouple_pos_ori': True,
'control_delta': True,
'interpolation': None,
'ramp_ratio': 0.2
},
'robots': ['Panda'],
'camera_depths': False,
'camera_heights': 84,
'camera_widths': 84,
'reward_shaping': False,
'camera_names': ['agentview', 'robot0_eye_in_hand'],
'render_gpu_device_id': 0
},
'use_image_obs': True
})
for demo_idx, demo_data in demos.items():
demo_group = data_group.create_group(demo_idx)
demo_group.attrs['num_samples'] = len(demo_data['actions'])
demo_group.create_dataset('actions', data=demo_data['actions'])
obs_group = demo_group.create_group('obs')
for key, value in demo_data['obs'].items():
obs_group.create_dataset(key, data=value)
if __name__ == '__main__':
main()