# Copyright 2022 The HuggingFace Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # File inspired by source: https://github.com/openai/gym/blob/master/gym/envs/box2d/lunar_lander.py import argparse import time import os import numpy as np import simulate as sm import os from pathlib import Path from agent import DuelingDQNAgent, MetricLogger from params import hyperparams # This example reimplements the famous lunar lander reinforcement learning environment. # CONSTANTS From source # TODO implement scaling SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well # TODO integrate random initial forces INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder # Lander construction LANDER_POLY = np.array([(-17, -10, 0), (-17, 0, 0), (-14, 17, 0), (14, 17, 0), (17, 0, 0), (17, -10, 0)])[::-1] / SCALE LEG_AWAY = 20 LEG_DOWN = -7 LEG_ANGLE = 0.25 # radians LEG_W, LEG_H = 2, 8 LEG_RIGHT_POLY = ( np.array( [ (LEG_AWAY, LEG_DOWN, 0), (LEG_AWAY + LEG_H * np.sin(LEG_ANGLE), LEG_DOWN - LEG_H * np.cos(LEG_ANGLE), 0), ( LEG_AWAY + LEG_H * np.sin(LEG_ANGLE) + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE), LEG_DOWN - LEG_H * np.cos(LEG_ANGLE) + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE), 0, ), (LEG_AWAY + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE), LEG_DOWN + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE), 0), ] ) / SCALE ) LEG_LEFT_POLY = [[-x, y, z] for x, y, z in LEG_RIGHT_POLY][::-1] LANDER_COLOR = [128 / 255, 102 / 255, 230 / 255] # terrain construction VIEWPORT_W = 600 # TODO integrate camera with these exact dimensions VIEWPORT_H = 400 W = VIEWPORT_W / SCALE H = VIEWPORT_H / SCALE CHUNKS = 11 HEIGHTS = np.random.uniform(0, H / 2, size=(CHUNKS + 1,)) CHUNK_X = [W / (CHUNKS - 1) * i for i in range(CHUNKS)] HELIPAD_x1 = CHUNK_X[CHUNKS // 2 - 1] HELIPAD_x2 = CHUNK_X[CHUNKS // 2 + 1] HELIPAD_y = H / 4 HEIGHTS[CHUNKS // 2 - 2] = HELIPAD_y HEIGHTS[CHUNKS // 2 - 1] = HELIPAD_y HEIGHTS[CHUNKS // 2 + 0] = HELIPAD_y HEIGHTS[CHUNKS // 2 + 1] = HELIPAD_y HEIGHTS[CHUNKS // 2 + 2] = HELIPAD_y SMOOTH_Y = [0.33 * (HEIGHTS[i - 1] + HEIGHTS[i + 0] + HEIGHTS[i + 1]) for i in range(CHUNKS)] # advanced features MAIN_ENGINE_POWER = 13.0 # TODO integrate specific forces SIDE_ENGINE_POWER = 0.6 # TODO integrate specific forces LEG_SPRING_TORQUE = 40 # TODO integrate specific forces SIDE_ENGINE_HEIGHT = 14.0 # TODO integrate specific forces SIDE_ENGINE_AWAY = 12.0 # TODO integrate specific forces LAND_POLY = ( [[CHUNK_X[0], SMOOTH_Y[0] - 3, 0]] + [[x, y, 0] for x, y in zip(CHUNK_X, SMOOTH_Y)] + [[CHUNK_X[-1], SMOOTH_Y[0] - 3, 0]] ) def make_lander(engine="unity", engine_exe=""): # Add sm scene sc = sm.Scene(engine=engine, engine_exe=engine_exe) # initial lander position sampling lander_init_pos = (10, 15, 0) + np.random.uniform(2, 4, 3) lander_init_pos[2] = 0.0 # z axis is always 0, for 2D lander_material = sm.Material(base_color=LANDER_COLOR) # create the lander polygons # first, the main lander body lander = sm.Polygon( points=LANDER_POLY, material=lander_material, position=lander_init_pos, name="lunar_lander", is_actor=True, physics_component=sm.RigidBodyComponent( use_gravity=True, constraints=["freeze_rotation_x", "freeze_rotation_y", "freeze_position_z"], mass=1, ), ) # extrude to make 3D visually. lander.mesh.extrude((0, 0, -1), capping=True, inplace=True) lander.actuator = sm.Actuator( mapping=[ sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=5), sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=-5), sm.ActionMapping("add_force", axis=[0, 1, 0], amplitude=2.5), ], n=3, ) # add an invisible box as collider until convex meshes are completed lander += sm.Box( position=[0, np.min(LEG_RIGHT_POLY, axis=0)[1], -0.5], bounds=[0.1, 2 * np.max(LEG_RIGHT_POLY, axis=0)[0], 1], material=sm.Material.TRANSPARENT, rotation=[0, 0, 90], with_collider=True, name="lander_collider_box_bottom", ) lander += sm.Box( position=[-0.6, 0, -0.5], bounds=[0.1, 26 / SCALE, 1], material=sm.Material.TRANSPARENT, rotation=[0, 0, -15], with_collider=True, name="lander_collider_box_right", ) lander += sm.Box( position=[0.6, 0, -0.5], bounds=[0.1, 26 / SCALE, 1], material=sm.Material.TRANSPARENT, rotation=[0, 0, 15], with_collider=True, name="lander_collider_box_left", ) # add legs as children objects (they take positions as local coordinates!) r_leg = sm.Polygon( points=LEG_RIGHT_POLY, material=lander_material, parent=lander, name="lander_r_leg", # with_collider=True, # TODO can use this when convex colliders is added ) r_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True) l_leg = sm.Polygon( points=LEG_LEFT_POLY, material=lander_material, parent=lander, name="lander_l_leg", # with_collider=True, # TODO can use this when convex colliders is added ) l_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True) # Create land object land = sm.Polygon( points=LAND_POLY[::-1], # Reversing vertex order so the normal faces the right direction material=sm.Material.GRAY, name="Moon", ) land.mesh.extrude((0, 0, -1), capping=True, inplace=True) # Create collider blocks for the land (non-convex meshes are TODO) for i in range(len(CHUNK_X) - 1): x1, x2 = CHUNK_X[i], CHUNK_X[i + 1] y1, y2 = SMOOTH_Y[i], SMOOTH_Y[i + 1] # compute rotation from generated coordinates rotation = [0, 0, +90 + np.degrees(np.arctan2(y2 - (y1 + y2) / 2, (x2 - x1) / 2))] block_i = sm.Box( position=[(x1 + x2) / 2, (y1 + y2) / 2, -0.5], bounds=[0.2, 1.025 * np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2), 1], # adjustment for better colliders material=sm.Material.GRAY, rotation=rotation, with_collider=True, name="land_collider_" + str(i), ) sc += block_i # add target triangle / cone for reward sc += sm.Cone( position=[(HELIPAD_x1 + HELIPAD_x2) / 2, HELIPAD_y, -0.5], height=10 / SCALE, radius=10 / SCALE, material=sm.Material.YELLOW, name="target", ) # TODO add lander state sensors for state-based RL sc += sm.StateSensor( target_entity=sc.target, reference_entity=lander, properties=["position", "rotation", "distance"], name="goal_sense", ) # create Euclidean distance reward, scalar changes the reward to a cost cost = sm.RewardFunction( type="dense", entity_a=lander, entity_b=sc.target, scalar=-1 ) # By default a dense reward equal to the distance between 2 entities lander += cost sc += lander sc += land return sc def get_values(state): return state.get("StateSensor") def train(agent, env, logger): episodes = 20000 for e in range(episodes): state = env.reset() # Play the game! for i in range(100): # Run agent on the state action = agent.act(get_values(state)) # env.render() # Agent performs action next_state, reward, done, info = env.step(action) print("####################") print(done) print("####################") # Remember agent.cache(get_values(state), get_values(next_state), action, reward, done) # Learn q, loss = agent.learn() # Logging logger.log_step(reward, loss, q) # Update state state = next_state # Check if end of game if done: break logger.log_episode(e) if e % 20 == 0: logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--build_exe", default="", type=str, required=False, help="Pre-built unity app for simulate") parser.add_argument( "--num_steps", default=100, type=int, required=False, help="number of steps to run the simulator" ) args = parser.parse_args() sc = make_lander(engine="unity", engine_exe=args.build_exe) sc += sm.LightSun() env = sm.RLEnv(sc, frame_skip=1) env.reset() # for i in range(500): # print(sc.observation_space.sample()) # action = [sc.action_space.sample()] # print("###############") # print(action) # obs, reward, done, info = env.step(action) # print(obs) # print(f"step {i}, reward {reward[0]}") # time.sleep(0.1) # env.close() checkpoint = None # checkpoint = Path('checkpoints/latest/airstriker_net_3.chkpt') path = "checkpoints/lunar-lander-dueling-dqn-rc" save_dir = Path(path) isExist = os.path.exists(path) if not isExist: os.makedirs(path) logger = MetricLogger(save_dir) print("Training Dueling DQN Agent with step decay!") agent = DuelingDQNAgent( state_dim=7, action_dim=env.action_space.n, save_dir=save_dir, checkpoint=checkpoint, **hyperparams ) # print("Training Dueling DQN Agent!") # agent = DuelingDQNAgent( # state_dim=8, # action_dim=env.action_space.n, # save_dir=save_dir, # checkpoint=checkpoint, # **hyperparams # ) # fill_memory(agent, env, 5000) train(agent, env, logger)