File size: 5,060 Bytes
a162e39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
from dataclasses import dataclass

import param_
from drone import Drone
from dronemodel import DroneModel
from settings import Settings


@dataclass
class Team:
    """
    Creates a team (it is either red or blue / foe or friend
    """

    is_blue: bool
    drones: [Drone]
    drone_model: DroneModel
    weighted_distance: float = 0

    def reset(self, obs=None):

        self.delta_weighted_distance()
        if obs:
            for drone, obs in zip(self.drones, obs):
                drone.reset(obs=obs)
        else:
            for drone in self.drones:
                drone.reset()

    def get_observation(self) -> np.ndarray:
        """
        get the observation for the RL agent
        :return: observation in the form of flatten np.arrays of shape(squad_number, 6*squad_size)
        """
        obs = np.array([drone.get_observation() for drone in self.drones])
        deads = ~np.array([drone.is_alive for drone in self.drones])

        return obs, deads

    def step(self, action: np.ndarray):
        obs = np.zeros((len(self.drones), 6))
        done = np.zeros((len(self.drones),))
        reward = np.zeros((len(self.drones),))
        infos = [{} for d in range(len(self.drones))]
        for index, drone in enumerate(self.drones):
            obs[index], reward[index], done[index], infos[index] = drone.step(action[index])
        done = (sum(done) == len(self.drones))
        info = {'oob': 0, 'hits_target': 0, 'ttl': param_.DURATION, 'distance_to_straight_action': 0}
        for i in infos:
            info['ttl'] = min(info['ttl'], i['ttl'])
            info['oob'] += i['oob'] if 'oob' in i else 0
            info['hits_target'] += i['hits_target'] if 'hits_target' in i else 0
            info['delta_distance'] = 0 if self.is_blue else self.delta_weighted_distance()
            info['distance_to_straight_action'] += i['distance_to_straight_action'] \
                if 'distance_to_straight_action' in i else 0
        return obs, sum(reward), done, info

    def delta_weighted_distance(self):

        # distance of drones to 0
        team_distance = np.array([d.distance() for d in self.drones if d.is_alive])
        weighted_distance = np.sum(np.exp(-0.5 * (team_distance / (Settings.perimeter/2)) ** 2))

        delta = weighted_distance - self.weighted_distance if 0 < self.weighted_distance else 0

        self.weighted_distance = weighted_distance

        return delta


class BlueTeam(Team):
    """
    Creates the blue team
    """

    def __init__(self, number_of_drones: int = Settings.blues):
        self.is_blue = True
        self.drone_model = DroneModel(self.is_blue)

        # initialise blue positions and speeds
        positions = np.zeros((number_of_drones, 3))
        speeds = np.zeros((number_of_drones, 3))
        blue_speed = Settings.blue_speed_init * self.drone_model.max_speed
        circle = index = 0
        for d in range(number_of_drones):
            positions[d] = np.array([Settings.blue_circles_rho[circle],
                                     Settings.blue_circles_theta[circle] + index * 2 * np.pi / 3,
                                     Settings.blue_circles_zed[circle]])
            clockwise = 1 - 2 * (circle % 2)
            speeds[d] = np.array([blue_speed, np.pi / 6 * clockwise, 0])
            index += 1
            if index == Settings.blues_per_circle[circle]:
                index = 0
                circle += 1

        self.drones = [Drone(is_blue=True, position=position, speed=speed, id_=id_)
                       for (id_, position, speed) in zip(range(number_of_drones), positions, speeds)]


class RedTeam(Team):
    """
    Creates the red team
    """

    def __init__(self, number_of_drones: int = Settings.reds):
        self.is_blue = False
        self.drone_model = DroneModel(self.is_blue)

        positions = np.zeros((number_of_drones, 3))
        positions_noise = np.zeros((number_of_drones, 3))
        speeds = np.zeros((number_of_drones, 3))
        speed_rho = Settings.red_speed_init * self.drone_model.max_speed
        squad = index = 0
        for d in range(number_of_drones):
            positions[d] = [Settings.red_squads_rho[squad],
                            Settings.red_squads_theta[squad],
                            Settings.red_squads_zed[squad]]
            positions_noise[d] = [Settings.red_rho_noise[squad],
                                  Settings.red_theta_noise[squad],
                                  Settings.red_zed_noise[squad]]
            speeds[d] = [speed_rho, np.pi + positions[d][1], 0]
            speeds[d] = [speed_rho, np.pi + positions[d][1], 0]
            index += 1
            if index == Settings.red_squads[squad]:
                index = 0
                squad += 1

        self.drones = [Drone(is_blue=False, position=position, position_noise=position_noise, speed=speed, id_=id_)
                       for (id_, position, position_noise, speed) in
                       zip(range(len(positions)), positions, positions_noise, speeds)]