File size: 6,684 Bytes
d67dca9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
from os import mkdir
import gym
import tensorflow as tf
import tf_slim as slim
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import json
from tensorflow import keras
from ns3gym import ns3env
from DDQN_FNN import DoubleDeepQNetwork

jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
jammerType = jammerTypes[0]
network = 'FNN'
cscs = [0, 0.1, 0.2, 0.3, 0.4]  # Channel switching cost

for csc in cscs:
    env = gym.make('ns3-v0')
    ob_space = env.observation_space
    ac_space = env.action_space
    print("Observation space: ", ob_space, ob_space.dtype)
    print("Action space: ", ac_space, ac_space.n)

    s_size = ob_space.shape[0]
    a_size = ac_space.n
    total_episodes = 200
    max_env_steps = 100
    train_end = 0
    TRAIN_Episodes = 100
    remaining_Episodes = 0
    env._max_episode_steps = max_env_steps

    epsilon = 1.0  # exploration rate
    epsilon_min = 0.01
    epsilon_decay = 0.999
    discount_rate = 0.95
    lr = 0.001
    batch_size = 32

    DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
    rewards = []  # Store rewards for graphing
    epsilons = []  # Store the Explore/Exploit

    # Training agent
    for e in range(TRAIN_Episodes):
        state = env.reset()
        # print(f"Initial state is: {state}")
        state = np.reshape(state, [1, s_size])  # Resize to store in memory to pass to .predict
        tot_rewards = 0
        previous_action = 0
        for time in range(max_env_steps):  # 200 is when you "solve" the game. This can continue forever as far as I know
            action = DDQN_agent.action(state)
            next_state, reward, done, _ = env.step(action)
            # print(f'The next state is: {next_state}')
            # done: Three collisions occurred in the last 10 steps.
            # time == max_env_steps - 1 : No collisions occurred
            if done or time == max_env_steps - 1:
                rewards.append(tot_rewards)
                epsilons.append(DDQN_agent.epsilon)
                print("episode: {}/{}, score: {}, e: {}"
                      .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
                break
            # Applying channel switching cost
            if action != previous_action:
                reward -= csc
            next_state = np.reshape(next_state, [1, s_size])
            tot_rewards += reward
            DDQN_agent.store(state, action, reward, next_state, done)  # Resize to store in memory to pass to .predict
            state = next_state
            previous_action = action

            # Experience Replay
            if len(DDQN_agent.memory) > batch_size:
                DDQN_agent.experience_replay(batch_size)
        # Update the weights after each episode (You can configure this for x steps as well
        DDQN_agent.update_target_from_model()
        # If our current NN passes we are done
        # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
        if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
            # Set the rest of the episodes for testing
            remaining_Episodes = total_episodes - e
            train_end = e
            break

    # Testing
    print('Training complete. Testing started...')
    # TEST Time
    #   In this section we ALWAYS use exploit as we don't train anymore
    total_transmissions = 0
    successful_transmissions = 0
    if remaining_Episodes == 0:
        train_end = TRAIN_Episodes
        TEST_Episodes = 100
    else:
        TEST_Episodes = total_episodes - train_end
    # Testing Loop
    n_channel_switches = 0
    for e_test in range(TEST_Episodes):
        state = env.reset()
        state = np.reshape(state, [1, s_size])
        tot_rewards = 0
        previous_channel = 0
        for t_test in range(max_env_steps):
            action = DDQN_agent.test_action(state)
            next_state, reward, done, _ = env.step(action)
            if done or t_test == max_env_steps - 1:
                rewards.append(tot_rewards)
                epsilons.append(0)  # We are doing full exploit
                print("episode: {}/{}, score: {}, e: {}"
                      .format(e_test, TEST_Episodes, tot_rewards, 0))
                break
            next_state = np.reshape(next_state, [1, s_size])
            tot_rewards += reward
            if action != previous_channel:
                n_channel_switches += 1
            if reward == 1:
                successful_transmissions += 1
            # DON'T STORE ANYTHING DURING TESTING
            state = next_state
            previous_channel = action
            # done: More than 3 collisions occurred in the last 10 steps.
            # t_test == max_env_steps - 1: No collisions occurred
            total_transmissions += 1

    # Plotting
    plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
    rolling_average = np.convolve(rewards, np.ones(10) / 10)
    plt.plot(rewards)
    plt.plot(rolling_average, color='black')
    plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-')  # Solved Line
    # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
    eps_graph = [200 * x for x in epsilons]
    plt.plot(eps_graph, color='g', linestyle='-')
    # Plot the line where TESTING begins
    plt.axvline(x=train_end, color='y', linestyle='-')
    plt.xlim((0, train_end+TEST_Episodes))
    plt.ylim((0, max_env_steps))
    plt.xlabel('Episodes')
    plt.ylabel('Rewards')
    plt.savefig(plotName, bbox_inches='tight')
    # plt.show()

    # Save Results
    # Rewards
    fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
    with open(fileName, 'w') as f:
        json.dump(rewards, f)
    # Normalized throughput
    normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
    print(f'The normalized throughput is: {normalizedThroughput}')
    fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
    with open(fileName, 'w') as f:
        json.dump(normalizedThroughput, f)
    # Channel switching times
    normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
    print(f'The normalized channel switching times is: {normalized_cst}')
    fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
    with open(fileName, 'w') as f:
        json.dump(normalized_cst, f)
    # Save the agent as a SavedAgent.
    agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
    DDQN_agent.save_model(agentName)