File size: 3,857 Bytes
a162e39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gym
from gym import spaces
import numpy as np

import param_
from settings import Settings
from playground import Playground
from team import BlueTeam, RedTeam


class SwarmEnv(gym.Env):
    """
    Custom 3D-Environment that follows gym interface.
    This is a 3D-env where the blue drones defend a circular GROUNDZONE from a red drones attack
    """

    def __init__(self, blues=Settings.blues, reds=Settings.reds):
        """
        :param distance: the distance to the other rim
        """
        super(SwarmEnv, self).__init__()

        self.nb_blues = blues
        self.nb_reds = reds

        self.blue_team = BlueTeam(number_of_drones=self.nb_blues)
        self.red_team = RedTeam(number_of_drones=self.nb_reds)

        self.playground = Playground(env=self, blue_drones=self.blue_team.drones, red_drones=self.red_team.drones)

        self.steps = 0

        self.observation_space = spaces.Tuple((
            spaces.Box(low=0, high=1, shape=(self.nb_blues, 6), dtype=np.float32),
            spaces.Box(low=0, high=1, shape=(self.nb_reds, 6), dtype=np.float32),
            spaces.Box(low=0, high=1, shape=(self.nb_blues, self.nb_reds), dtype=np.float32),
            spaces.Box(low=0, high=1, shape=(self.nb_reds, self.nb_blues), dtype=np.float32),
            spaces.MultiBinary(self.nb_blues),
            spaces.MultiBinary(self.nb_reds),
        ))

        self.action_space = spaces.Tuple((
            spaces.Box(low=0, high=1, shape=(self.nb_blues, 3), dtype=np.float32),
            spaces.Box(low=0, high=1, shape=(self.nb_reds, 3), dtype=np.float32)))

    def reset(self, obs=None):
        """
        resets the environment as part of Gym interface
        """
        if obs:
            blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = obs
        else:
            blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads = None, None, None, None, None, None

        self.blue_team.reset(obs=blue_obs)
        self.red_team.reset(obs=red_obs)
        self.playground.reset()
        self.steps = 0

        # get observations from blue and red teams
        blue_obs, blue_deads = self.blue_team.get_observation()
        red_obs, red_deads = self.red_team.get_observation()
        blues_fire, reds_fire = self.playground.get_observation()

        return blue_obs, red_obs, blues_fire, reds_fire, blue_deads, red_deads

    def render(self, mode='human'):
        pass

    def step(self, action):

        self.steps += 1

        blue_action, red_action = action
        blue_obs, blue_reward, blue_done, blue_info = self.blue_team.step(blue_action)
        red_obs, red_reward, red_done, red_info = self.red_team.step(red_action)
        bf_obs, bf_reward, remaining_blues, blue_shots, rf_obs, rf_reward, remaining_reds, red_shots = \
            self.playground.step()
        _, blue_deads = self.blue_team.get_observation()
        _, red_deads = self.red_team.get_observation()
        obs = blue_obs, red_obs, bf_obs, rf_obs, blue_deads, red_deads
        reward = blue_reward + red_reward + bf_reward + rf_reward
        done = False

        info = {}
        info['red_oob'] = red_info['oob']
        info['blue_oob'] = blue_info['oob']
        info['hits_target'] = red_info['hits_target']
        info['blue_shots'] = blue_shots
        info['red_shots'] = red_shots
        info['weighted_red_distance'] = red_info['delta_distance']
        info['remaining blues'] = len(blue_deads)-sum(blue_deads)
        info['remaining reds'] = len(red_deads)-sum(red_deads)
        info['ttl'] = red_info['ttl']
        info['distance_to_straight_action'] = red_info['distance_to_straight_action']

        if red_info['oob'] + blue_info['oob'] + red_info['hits_target'] + blue_shots + red_shots > 0:
            print('something happened')

        return obs, reward, done, info