|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
import time |
|
import os |
|
import numpy as np |
|
|
|
import simulate as sm |
|
import os |
|
from pathlib import Path |
|
from agent import DuelingDQNAgent, MetricLogger |
|
from params import hyperparams |
|
|
|
|
|
|
|
|
|
|
|
SCALE = 30.0 |
|
|
|
|
|
INITIAL_RANDOM = 1000.0 |
|
|
|
|
|
LANDER_POLY = np.array([(-17, -10, 0), (-17, 0, 0), (-14, 17, 0), (14, 17, 0), (17, 0, 0), (17, -10, 0)])[::-1] / SCALE |
|
LEG_AWAY = 20 |
|
LEG_DOWN = -7 |
|
LEG_ANGLE = 0.25 |
|
LEG_W, LEG_H = 2, 8 |
|
|
|
LEG_RIGHT_POLY = ( |
|
np.array( |
|
[ |
|
(LEG_AWAY, LEG_DOWN, 0), |
|
(LEG_AWAY + LEG_H * np.sin(LEG_ANGLE), LEG_DOWN - LEG_H * np.cos(LEG_ANGLE), 0), |
|
( |
|
LEG_AWAY + LEG_H * np.sin(LEG_ANGLE) + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE), |
|
LEG_DOWN - LEG_H * np.cos(LEG_ANGLE) + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE), |
|
0, |
|
), |
|
(LEG_AWAY + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE), LEG_DOWN + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE), 0), |
|
] |
|
) |
|
/ SCALE |
|
) |
|
|
|
LEG_LEFT_POLY = [[-x, y, z] for x, y, z in LEG_RIGHT_POLY][::-1] |
|
LANDER_COLOR = [128 / 255, 102 / 255, 230 / 255] |
|
|
|
|
|
VIEWPORT_W = 600 |
|
VIEWPORT_H = 400 |
|
|
|
W = VIEWPORT_W / SCALE |
|
H = VIEWPORT_H / SCALE |
|
|
|
CHUNKS = 11 |
|
HEIGHTS = np.random.uniform(0, H / 2, size=(CHUNKS + 1,)) |
|
CHUNK_X = [W / (CHUNKS - 1) * i for i in range(CHUNKS)] |
|
HELIPAD_x1 = CHUNK_X[CHUNKS // 2 - 1] |
|
HELIPAD_x2 = CHUNK_X[CHUNKS // 2 + 1] |
|
HELIPAD_y = H / 4 |
|
HEIGHTS[CHUNKS // 2 - 2] = HELIPAD_y |
|
HEIGHTS[CHUNKS // 2 - 1] = HELIPAD_y |
|
HEIGHTS[CHUNKS // 2 + 0] = HELIPAD_y |
|
HEIGHTS[CHUNKS // 2 + 1] = HELIPAD_y |
|
HEIGHTS[CHUNKS // 2 + 2] = HELIPAD_y |
|
SMOOTH_Y = [0.33 * (HEIGHTS[i - 1] + HEIGHTS[i + 0] + HEIGHTS[i + 1]) for i in range(CHUNKS)] |
|
|
|
|
|
MAIN_ENGINE_POWER = 13.0 |
|
SIDE_ENGINE_POWER = 0.6 |
|
LEG_SPRING_TORQUE = 40 |
|
SIDE_ENGINE_HEIGHT = 14.0 |
|
SIDE_ENGINE_AWAY = 12.0 |
|
|
|
LAND_POLY = ( |
|
[[CHUNK_X[0], SMOOTH_Y[0] - 3, 0]] |
|
+ [[x, y, 0] for x, y in zip(CHUNK_X, SMOOTH_Y)] |
|
+ [[CHUNK_X[-1], SMOOTH_Y[0] - 3, 0]] |
|
) |
|
|
|
|
|
def make_lander(engine="unity", engine_exe=""): |
|
|
|
sc = sm.Scene(engine=engine, engine_exe=engine_exe) |
|
|
|
|
|
lander_init_pos = (10, 15, 0) + np.random.uniform(2, 4, 3) |
|
lander_init_pos[2] = 0.0 |
|
|
|
lander_material = sm.Material(base_color=LANDER_COLOR) |
|
|
|
|
|
|
|
|
|
lander = sm.Polygon( |
|
points=LANDER_POLY, |
|
material=lander_material, |
|
position=lander_init_pos, |
|
name="lunar_lander", |
|
is_actor=True, |
|
physics_component=sm.RigidBodyComponent( |
|
use_gravity=True, |
|
constraints=["freeze_rotation_x", "freeze_rotation_y", "freeze_position_z"], |
|
mass=1, |
|
), |
|
) |
|
|
|
|
|
lander.mesh.extrude((0, 0, -1), capping=True, inplace=True) |
|
lander.actuator = sm.Actuator( |
|
mapping=[ |
|
sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=5), |
|
sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=-5), |
|
sm.ActionMapping("add_force", axis=[0, 1, 0], amplitude=2.5), |
|
], |
|
n=3, |
|
) |
|
|
|
|
|
lander += sm.Box( |
|
position=[0, np.min(LEG_RIGHT_POLY, axis=0)[1], -0.5], |
|
bounds=[0.1, 2 * np.max(LEG_RIGHT_POLY, axis=0)[0], 1], |
|
material=sm.Material.TRANSPARENT, |
|
rotation=[0, 0, 90], |
|
with_collider=True, |
|
name="lander_collider_box_bottom", |
|
) |
|
lander += sm.Box( |
|
position=[-0.6, 0, -0.5], |
|
bounds=[0.1, 26 / SCALE, 1], |
|
material=sm.Material.TRANSPARENT, |
|
rotation=[0, 0, -15], |
|
with_collider=True, |
|
name="lander_collider_box_right", |
|
) |
|
lander += sm.Box( |
|
position=[0.6, 0, -0.5], |
|
bounds=[0.1, 26 / SCALE, 1], |
|
material=sm.Material.TRANSPARENT, |
|
rotation=[0, 0, 15], |
|
with_collider=True, |
|
name="lander_collider_box_left", |
|
) |
|
|
|
|
|
r_leg = sm.Polygon( |
|
points=LEG_RIGHT_POLY, |
|
material=lander_material, |
|
parent=lander, |
|
name="lander_r_leg", |
|
|
|
) |
|
r_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True) |
|
|
|
l_leg = sm.Polygon( |
|
points=LEG_LEFT_POLY, |
|
material=lander_material, |
|
parent=lander, |
|
name="lander_l_leg", |
|
|
|
) |
|
l_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True) |
|
|
|
|
|
land = sm.Polygon( |
|
points=LAND_POLY[::-1], |
|
material=sm.Material.GRAY, |
|
name="Moon", |
|
) |
|
land.mesh.extrude((0, 0, -1), capping=True, inplace=True) |
|
|
|
|
|
for i in range(len(CHUNK_X) - 1): |
|
x1, x2 = CHUNK_X[i], CHUNK_X[i + 1] |
|
y1, y2 = SMOOTH_Y[i], SMOOTH_Y[i + 1] |
|
|
|
|
|
rotation = [0, 0, +90 + np.degrees(np.arctan2(y2 - (y1 + y2) / 2, (x2 - x1) / 2))] |
|
block_i = sm.Box( |
|
position=[(x1 + x2) / 2, (y1 + y2) / 2, -0.5], |
|
bounds=[0.2, 1.025 * np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2), 1], |
|
material=sm.Material.GRAY, |
|
rotation=rotation, |
|
with_collider=True, |
|
name="land_collider_" + str(i), |
|
) |
|
sc += block_i |
|
|
|
|
|
sc += sm.Cone( |
|
position=[(HELIPAD_x1 + HELIPAD_x2) / 2, HELIPAD_y, -0.5], |
|
height=10 / SCALE, |
|
radius=10 / SCALE, |
|
material=sm.Material.YELLOW, |
|
name="target", |
|
) |
|
|
|
|
|
sc += sm.StateSensor( |
|
target_entity=sc.target, |
|
reference_entity=lander, |
|
properties=["position", "rotation", "distance"], |
|
name="goal_sense", |
|
) |
|
|
|
|
|
cost = sm.RewardFunction( |
|
type="dense", entity_a=lander, entity_b=sc.target, scalar=-1 |
|
) |
|
lander += cost |
|
|
|
sc += lander |
|
sc += land |
|
|
|
return sc |
|
|
|
|
|
def get_values(state): |
|
return state.get("StateSensor") |
|
|
|
def train(agent, env, logger): |
|
episodes = 20000 |
|
for e in range(episodes): |
|
|
|
state = env.reset() |
|
|
|
for i in range(100): |
|
|
|
|
|
action = agent.act(get_values(state)) |
|
|
|
|
|
next_state, reward, done, info = env.step(action) |
|
|
|
print("####################") |
|
print(done) |
|
print("####################") |
|
|
|
|
|
agent.cache(get_values(state), get_values(next_state), action, reward, done) |
|
|
|
|
|
q, loss = agent.learn() |
|
|
|
|
|
logger.log_step(reward, loss, q) |
|
|
|
|
|
state = next_state |
|
|
|
|
|
if done: |
|
break |
|
|
|
logger.log_episode(e) |
|
|
|
if e % 20 == 0: |
|
logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step) |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--build_exe", default="", type=str, required=False, help="Pre-built unity app for simulate") |
|
parser.add_argument( |
|
"--num_steps", default=100, type=int, required=False, help="number of steps to run the simulator" |
|
) |
|
args = parser.parse_args() |
|
|
|
sc = make_lander(engine="unity", engine_exe=args.build_exe) |
|
sc += sm.LightSun() |
|
|
|
env = sm.RLEnv(sc, frame_skip=1) |
|
env.reset() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint = None |
|
|
|
|
|
path = "checkpoints/lunar-lander-dueling-dqn-rc" |
|
save_dir = Path(path) |
|
|
|
isExist = os.path.exists(path) |
|
if not isExist: |
|
os.makedirs(path) |
|
|
|
logger = MetricLogger(save_dir) |
|
|
|
print("Training Dueling DQN Agent with step decay!") |
|
agent = DuelingDQNAgent( |
|
state_dim=7, |
|
action_dim=env.action_space.n, |
|
save_dir=save_dir, |
|
checkpoint=checkpoint, |
|
**hyperparams |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train(agent, env, logger) |
|
|