File size: 10,518 Bytes

85e4824

# Copyright 2022 The HuggingFace Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# File inspired by source: https://github.com/openai/gym/blob/master/gym/envs/box2d/lunar_lander.py

import argparse
import time
import os
import numpy as np

import simulate as sm
import os
from pathlib import Path
from agent import DuelingDQNAgent, MetricLogger
from params import hyperparams

# This example reimplements the famous lunar lander reinforcement learning environment.

# CONSTANTS From source
# TODO implement scaling
SCALE = 30.0  # affects how fast-paced the game is, forces should be adjusted as well

# TODO integrate random initial forces
INITIAL_RANDOM = 1000.0  # Set 1500 to make game harder

# Lander construction
LANDER_POLY = np.array([(-17, -10, 0), (-17, 0, 0), (-14, 17, 0), (14, 17, 0), (17, 0, 0), (17, -10, 0)])[::-1] / SCALE
LEG_AWAY = 20
LEG_DOWN = -7
LEG_ANGLE = 0.25  # radians
LEG_W, LEG_H = 2, 8

LEG_RIGHT_POLY = (
    np.array(
        [
            (LEG_AWAY, LEG_DOWN, 0),
            (LEG_AWAY + LEG_H * np.sin(LEG_ANGLE), LEG_DOWN - LEG_H * np.cos(LEG_ANGLE), 0),
            (
                LEG_AWAY + LEG_H * np.sin(LEG_ANGLE) + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE),
                LEG_DOWN - LEG_H * np.cos(LEG_ANGLE) + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE),
                0,
            ),
            (LEG_AWAY + LEG_W * np.sin(np.pi / 2 - LEG_ANGLE), LEG_DOWN + LEG_W * np.cos(np.pi / 2 - LEG_ANGLE), 0),
        ]
    )
    / SCALE
)

LEG_LEFT_POLY = [[-x, y, z] for x, y, z in LEG_RIGHT_POLY][::-1]
LANDER_COLOR = [128 / 255, 102 / 255, 230 / 255]

# terrain construction
VIEWPORT_W = 600  # TODO integrate camera with these exact dimensions
VIEWPORT_H = 400

W = VIEWPORT_W / SCALE
H = VIEWPORT_H / SCALE

CHUNKS = 11
HEIGHTS = np.random.uniform(0, H / 2, size=(CHUNKS + 1,))
CHUNK_X = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
HELIPAD_x1 = CHUNK_X[CHUNKS // 2 - 1]
HELIPAD_x2 = CHUNK_X[CHUNKS // 2 + 1]
HELIPAD_y = H / 4
HEIGHTS[CHUNKS // 2 - 2] = HELIPAD_y
HEIGHTS[CHUNKS // 2 - 1] = HELIPAD_y
HEIGHTS[CHUNKS // 2 + 0] = HELIPAD_y
HEIGHTS[CHUNKS // 2 + 1] = HELIPAD_y
HEIGHTS[CHUNKS // 2 + 2] = HELIPAD_y
SMOOTH_Y = [0.33 * (HEIGHTS[i - 1] + HEIGHTS[i + 0] + HEIGHTS[i + 1]) for i in range(CHUNKS)]

# advanced features
MAIN_ENGINE_POWER = 13.0  # TODO integrate specific forces
SIDE_ENGINE_POWER = 0.6  # TODO integrate specific forces
LEG_SPRING_TORQUE = 40  # TODO integrate specific forces
SIDE_ENGINE_HEIGHT = 14.0  # TODO integrate specific forces
SIDE_ENGINE_AWAY = 12.0  # TODO integrate specific forces

LAND_POLY = (
    [[CHUNK_X[0], SMOOTH_Y[0] - 3, 0]]
    + [[x, y, 0] for x, y in zip(CHUNK_X, SMOOTH_Y)]
    + [[CHUNK_X[-1], SMOOTH_Y[0] - 3, 0]]
)


def make_lander(engine="unity", engine_exe=""):
    # Add sm scene
    sc = sm.Scene(engine=engine, engine_exe=engine_exe)

    # initial lander position sampling
    lander_init_pos = (10, 15, 0) + np.random.uniform(2, 4, 3)
    lander_init_pos[2] = 0.0  # z axis is always 0, for 2D

    lander_material = sm.Material(base_color=LANDER_COLOR)

    # create the lander polygons

    # first, the main lander body
    lander = sm.Polygon(
        points=LANDER_POLY,
        material=lander_material,
        position=lander_init_pos,
        name="lunar_lander",
        is_actor=True,
        physics_component=sm.RigidBodyComponent(
            use_gravity=True,
            constraints=["freeze_rotation_x", "freeze_rotation_y", "freeze_position_z"],
            mass=1,
        ),
    )

    # extrude to make 3D visually.
    lander.mesh.extrude((0, 0, -1), capping=True, inplace=True)
    lander.actuator = sm.Actuator(
        mapping=[
            sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=5),
            sm.ActionMapping("add_force", axis=[1, 0, 0], amplitude=-5),
            sm.ActionMapping("add_force", axis=[0, 1, 0], amplitude=2.5),
        ],
        n=3,
    )

    # add an invisible box as collider until convex meshes are completed
    lander += sm.Box(
        position=[0, np.min(LEG_RIGHT_POLY, axis=0)[1], -0.5],
        bounds=[0.1, 2 * np.max(LEG_RIGHT_POLY, axis=0)[0], 1],
        material=sm.Material.TRANSPARENT,
        rotation=[0, 0, 90],
        with_collider=True,
        name="lander_collider_box_bottom",
    )
    lander += sm.Box(
        position=[-0.6, 0, -0.5],
        bounds=[0.1, 26 / SCALE, 1],
        material=sm.Material.TRANSPARENT,
        rotation=[0, 0, -15],
        with_collider=True,
        name="lander_collider_box_right",
    )
    lander += sm.Box(
        position=[0.6, 0, -0.5],
        bounds=[0.1, 26 / SCALE, 1],
        material=sm.Material.TRANSPARENT,
        rotation=[0, 0, 15],
        with_collider=True,
        name="lander_collider_box_left",
    )

    # add legs as children objects (they take positions as local coordinates!)
    r_leg = sm.Polygon(
        points=LEG_RIGHT_POLY,
        material=lander_material,
        parent=lander,
        name="lander_r_leg",
        # with_collider=True, # TODO can use this when convex colliders is added
    )
    r_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True)

    l_leg = sm.Polygon(
        points=LEG_LEFT_POLY,
        material=lander_material,
        parent=lander,
        name="lander_l_leg",
        # with_collider=True, # TODO can use this when convex colliders is added
    )
    l_leg.mesh.extrude((0, 0, -1), capping=True, inplace=True)

    # Create land object
    land = sm.Polygon(
        points=LAND_POLY[::-1],  # Reversing vertex order so the normal faces the right direction
        material=sm.Material.GRAY,
        name="Moon",
    )
    land.mesh.extrude((0, 0, -1), capping=True, inplace=True)

    # Create collider blocks for the land (non-convex meshes are TODO)
    for i in range(len(CHUNK_X) - 1):
        x1, x2 = CHUNK_X[i], CHUNK_X[i + 1]
        y1, y2 = SMOOTH_Y[i], SMOOTH_Y[i + 1]

        # compute rotation from generated coordinates
        rotation = [0, 0, +90 + np.degrees(np.arctan2(y2 - (y1 + y2) / 2, (x2 - x1) / 2))]
        block_i = sm.Box(
            position=[(x1 + x2) / 2, (y1 + y2) / 2, -0.5],
            bounds=[0.2, 1.025 * np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2), 1],  # adjustment for better colliders
            material=sm.Material.GRAY,
            rotation=rotation,
            with_collider=True,
            name="land_collider_" + str(i),
        )
        sc += block_i

    # add target triangle / cone for reward
    sc += sm.Cone(
        position=[(HELIPAD_x1 + HELIPAD_x2) / 2, HELIPAD_y, -0.5],
        height=10 / SCALE,
        radius=10 / SCALE,
        material=sm.Material.YELLOW,
        name="target",
    )

    # TODO add lander state sensors for state-based RL
    sc += sm.StateSensor(
        target_entity=sc.target,
        reference_entity=lander,
        properties=["position", "rotation", "distance"],
        name="goal_sense",
    )

    # create Euclidean distance reward, scalar changes the reward to a cost
    cost = sm.RewardFunction(
        type="dense", entity_a=lander, entity_b=sc.target, scalar=-1
    )  # By default a dense reward equal to the distance between 2 entities
    lander += cost

    sc += lander
    sc += land

    return sc


def get_values(state):
    return state.get("StateSensor")

def train(agent, env, logger):
    episodes = 20000
    for e in range(episodes):

        state = env.reset()
        # Play the game!
        for i in range(100):
        
            # Run agent on the state
            action = agent.act(get_values(state))
            # env.render()
            # Agent performs action
            next_state, reward, done, info = env.step(action)

            print("####################")
            print(done)
            print("####################")
            
            # Remember
            agent.cache(get_values(state), get_values(next_state), action, reward, done)

            # Learn
            q, loss = agent.learn()

            # Logging
            logger.log_step(reward, loss, q)

            # Update state
            state = next_state
            
            # Check if end of game
            if done:
                break
        
        logger.log_episode(e)

        if e % 20 == 0:
            logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--build_exe", default="", type=str, required=False, help="Pre-built unity app for simulate")
    parser.add_argument(
        "--num_steps", default=100, type=int, required=False, help="number of steps to run the simulator"
    )
    args = parser.parse_args()

    sc = make_lander(engine="unity", engine_exe=args.build_exe)
    sc += sm.LightSun()

    env = sm.RLEnv(sc, frame_skip=1)
    env.reset()

    # for i in range(500):
    #     print(sc.observation_space.sample())
    #     action = [sc.action_space.sample()]
    #     print("###############")
    #     print(action)
    #     obs, reward, done, info = env.step(action)
    #     print(obs)
    #     print(f"step {i}, reward {reward[0]}")
    #     time.sleep(0.1)

    # env.close()

    checkpoint = None 
    # checkpoint = Path('checkpoints/latest/airstriker_net_3.chkpt')

    path = "checkpoints/lunar-lander-dueling-dqn-rc"
    save_dir = Path(path) 

    isExist = os.path.exists(path)
    if not isExist:
        os.makedirs(path)

    logger = MetricLogger(save_dir)

    print("Training Dueling DQN Agent with step decay!")
    agent = DuelingDQNAgent(
        state_dim=7, 
        action_dim=env.action_space.n,
        save_dir=save_dir, 
        checkpoint=checkpoint,  
        **hyperparams
    )
    # print("Training Dueling DQN Agent!")
    # agent = DuelingDQNAgent(
    #     state_dim=8, 
    #     action_dim=env.action_space.n,
    #     save_dir=save_dir, 
    #     checkpoint=checkpoint,  
    #     **hyperparams
    # )

    # fill_memory(agent, env, 5000)
    train(agent, env, logger)