Spaces:
Sleeping
Sleeping
File size: 6,684 Bytes
d67dca9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
from os import mkdir
import gym
import tensorflow as tf
import tf_slim as slim
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import json
from tensorflow import keras
from ns3gym import ns3env
from DDQN_FNN import DoubleDeepQNetwork
jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
jammerType = jammerTypes[0]
network = 'FNN'
cscs = [0, 0.1, 0.2, 0.3, 0.4] # Channel switching cost
for csc in cscs:
env = gym.make('ns3-v0')
ob_space = env.observation_space
ac_space = env.action_space
print("Observation space: ", ob_space, ob_space.dtype)
print("Action space: ", ac_space, ac_space.n)
s_size = ob_space.shape[0]
a_size = ac_space.n
total_episodes = 200
max_env_steps = 100
train_end = 0
TRAIN_Episodes = 100
remaining_Episodes = 0
env._max_episode_steps = max_env_steps
epsilon = 1.0 # exploration rate
epsilon_min = 0.01
epsilon_decay = 0.999
discount_rate = 0.95
lr = 0.001
batch_size = 32
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
rewards = [] # Store rewards for graphing
epsilons = [] # Store the Explore/Exploit
# Training agent
for e in range(TRAIN_Episodes):
state = env.reset()
# print(f"Initial state is: {state}")
state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
tot_rewards = 0
previous_action = 0
for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know
action = DDQN_agent.action(state)
next_state, reward, done, _ = env.step(action)
# print(f'The next state is: {next_state}')
# done: Three collisions occurred in the last 10 steps.
# time == max_env_steps - 1 : No collisions occurred
if done or time == max_env_steps - 1:
rewards.append(tot_rewards)
epsilons.append(DDQN_agent.epsilon)
print("episode: {}/{}, score: {}, e: {}"
.format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
break
# Applying channel switching cost
if action != previous_action:
reward -= csc
next_state = np.reshape(next_state, [1, s_size])
tot_rewards += reward
DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
state = next_state
previous_action = action
# Experience Replay
if len(DDQN_agent.memory) > batch_size:
DDQN_agent.experience_replay(batch_size)
# Update the weights after each episode (You can configure this for x steps as well
DDQN_agent.update_target_from_model()
# If our current NN passes we are done
# Early stopping criteria: I am going to use the last 10 runs within 1% of the max
if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
# Set the rest of the episodes for testing
remaining_Episodes = total_episodes - e
train_end = e
break
# Testing
print('Training complete. Testing started...')
# TEST Time
# In this section we ALWAYS use exploit as we don't train anymore
total_transmissions = 0
successful_transmissions = 0
if remaining_Episodes == 0:
train_end = TRAIN_Episodes
TEST_Episodes = 100
else:
TEST_Episodes = total_episodes - train_end
# Testing Loop
n_channel_switches = 0
for e_test in range(TEST_Episodes):
state = env.reset()
state = np.reshape(state, [1, s_size])
tot_rewards = 0
previous_channel = 0
for t_test in range(max_env_steps):
action = DDQN_agent.test_action(state)
next_state, reward, done, _ = env.step(action)
if done or t_test == max_env_steps - 1:
rewards.append(tot_rewards)
epsilons.append(0) # We are doing full exploit
print("episode: {}/{}, score: {}, e: {}"
.format(e_test, TEST_Episodes, tot_rewards, 0))
break
next_state = np.reshape(next_state, [1, s_size])
tot_rewards += reward
if action != previous_channel:
n_channel_switches += 1
if reward == 1:
successful_transmissions += 1
# DON'T STORE ANYTHING DURING TESTING
state = next_state
previous_channel = action
# done: More than 3 collisions occurred in the last 10 steps.
# t_test == max_env_steps - 1: No collisions occurred
total_transmissions += 1
# Plotting
plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
rolling_average = np.convolve(rewards, np.ones(10) / 10)
plt.plot(rewards)
plt.plot(rolling_average, color='black')
plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
# Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
eps_graph = [200 * x for x in epsilons]
plt.plot(eps_graph, color='g', linestyle='-')
# Plot the line where TESTING begins
plt.axvline(x=train_end, color='y', linestyle='-')
plt.xlim((0, train_end+TEST_Episodes))
plt.ylim((0, max_env_steps))
plt.xlabel('Episodes')
plt.ylabel('Rewards')
plt.savefig(plotName, bbox_inches='tight')
# plt.show()
# Save Results
# Rewards
fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
with open(fileName, 'w') as f:
json.dump(rewards, f)
# Normalized throughput
normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
print(f'The normalized throughput is: {normalizedThroughput}')
fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
with open(fileName, 'w') as f:
json.dump(normalizedThroughput, f)
# Channel switching times
normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
print(f'The normalized channel switching times is: {normalized_cst}')
fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
with open(fileName, 'w') as f:
json.dump(normalized_cst, f)
# Save the agent as a SavedAgent.
agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
DDQN_agent.save_model(agentName)
|