Spaces:
Sleeping
Sleeping
Updating the AntiJamEnv
Browse files- DDQN.py +5 -6
- antiJamEnv.py +34 -38
- app.py +22 -5
- tester.py +11 -96
- trainer.py +10 -77
DDQN.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import numpy as np
|
2 |
from collections import deque
|
3 |
from tensorflow import keras
|
|
|
4 |
import random
|
5 |
|
6 |
|
@@ -24,14 +25,9 @@ class DoubleDeepQNetwork:
|
|
24 |
def build_model(self):
|
25 |
model = keras.Sequential() # linear stack of layers https://keras.io/models/sequential/
|
26 |
model.add(keras.layers.Dense(24, input_dim=self.history * self.nS, activation='relu')) # [Input] -> Layer 1
|
27 |
-
# Dense: Densely connected layer https://keras.io/layers/core/
|
28 |
-
# 24: Number of neurons
|
29 |
-
# input_dim: Number of input variables
|
30 |
-
# activation: Rectified Linear Unit (relu) ranges >= 0
|
31 |
model.add(keras.layers.Dense(24, activation='relu')) # Layer 2 -> 3
|
32 |
model.add(keras.layers.Dense(self.nA, activation='linear')) # Layer 3 -> [output]
|
33 |
-
|
34 |
-
# Linear activation on the last layer
|
35 |
model.compile(loss='mean_squared_error', # Loss function: Mean Squared Error
|
36 |
optimizer=keras.optimizers.Adam(
|
37 |
lr=self.alpha)) # Optimizer: Adam (Feel free to check other options)
|
@@ -59,6 +55,9 @@ class DoubleDeepQNetwork:
|
|
59 |
# Save the agent model weights in a file
|
60 |
self.model.save(agentName)
|
61 |
|
|
|
|
|
|
|
62 |
def experience_replay(self, batch_size):
|
63 |
# Execute the experience replay
|
64 |
minibatch = random.sample(self.memory, batch_size) # Randomly sample from memory
|
|
|
1 |
import numpy as np
|
2 |
from collections import deque
|
3 |
from tensorflow import keras
|
4 |
+
from tensorflow.keras.models import load_model
|
5 |
import random
|
6 |
|
7 |
|
|
|
25 |
def build_model(self):
|
26 |
model = keras.Sequential() # linear stack of layers https://keras.io/models/sequential/
|
27 |
model.add(keras.layers.Dense(24, input_dim=self.history * self.nS, activation='relu')) # [Input] -> Layer 1
|
|
|
|
|
|
|
|
|
28 |
model.add(keras.layers.Dense(24, activation='relu')) # Layer 2 -> 3
|
29 |
model.add(keras.layers.Dense(self.nA, activation='linear')) # Layer 3 -> [output]
|
30 |
+
|
|
|
31 |
model.compile(loss='mean_squared_error', # Loss function: Mean Squared Error
|
32 |
optimizer=keras.optimizers.Adam(
|
33 |
lr=self.alpha)) # Optimizer: Adam (Feel free to check other options)
|
|
|
55 |
# Save the agent model weights in a file
|
56 |
self.model.save(agentName)
|
57 |
|
58 |
+
def load_saved_model(self, agent_name):
|
59 |
+
return load_model(agent_name)
|
60 |
+
|
61 |
def experience_replay(self, batch_size):
|
62 |
# Execute the experience replay
|
63 |
minibatch = random.sample(self.memory, batch_size) # Randomly sample from memory
|
antiJamEnv.py
CHANGED
@@ -16,47 +16,49 @@ import numpy as np
|
|
16 |
|
17 |
|
18 |
class AntiJamEnv(gym.Env):
|
19 |
-
def __init__(self):
|
20 |
super(AntiJamEnv, self).__init__()
|
21 |
|
|
|
|
|
|
|
|
|
22 |
self.num_channels = 8
|
23 |
self.channel_bandwidth = 20 # MHz
|
24 |
self.frequency_range = [5180, 5320] # MHz
|
|
|
25 |
|
26 |
self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
|
27 |
self.action_space = spaces.Discrete(self.num_channels)
|
28 |
-
|
29 |
-
self.current_channel = np.random.randint(self.num_channels)
|
30 |
self.jammer_modes = ['constant', 'random', 'sweeping']
|
31 |
-
self.
|
32 |
-
self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
|
33 |
|
34 |
-
|
35 |
-
# Simulate received jamming power using normal distribution
|
36 |
-
jammed_power = np.random.normal(loc=30, scale=5)
|
37 |
-
adjacent_power = np.random.normal(loc=13, scale=3)
|
38 |
-
far_away_power = np.random.normal(loc=-7, scale=1)
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
return far_away_power
|
46 |
else:
|
47 |
-
|
|
|
|
|
|
|
|
|
48 |
|
49 |
def step(self, action):
|
50 |
assert self.action_space.contains(action), "Invalid action"
|
51 |
|
52 |
received_power = self._get_received_power(action)
|
53 |
if received_power >= 0:
|
54 |
-
reward = 1.0
|
55 |
-
else:
|
56 |
reward = 0.0
|
|
|
|
|
57 |
|
58 |
if self.current_channel != action:
|
59 |
-
reward
|
60 |
|
61 |
self.current_channel = action
|
62 |
|
@@ -71,27 +73,21 @@ class AntiJamEnv(gym.Env):
|
|
71 |
|
72 |
return self.observation, reward, False, {}
|
73 |
|
74 |
-
def
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
78 |
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
81 |
|
82 |
def render(self, mode='human'):
|
83 |
pass
|
84 |
|
85 |
def close(self):
|
86 |
-
pass
|
87 |
-
|
88 |
-
|
89 |
-
# Test the environment
|
90 |
-
env = AntiJamEnv()
|
91 |
-
observation = env.reset()
|
92 |
-
for _ in range(10):
|
93 |
-
action = env.action_space.sample()
|
94 |
-
observation, reward, done, _ = env.step(action)
|
95 |
-
print("Action:", action, "Reward:", reward, "Observation:", observation)
|
96 |
-
if done:
|
97 |
-
break
|
|
|
16 |
|
17 |
|
18 |
class AntiJamEnv(gym.Env):
|
19 |
+
def __init__(self, jammer_type, channel_switching_cost):
|
20 |
super(AntiJamEnv, self).__init__()
|
21 |
|
22 |
+
self.observation = None
|
23 |
+
self.jammer_frequency = None
|
24 |
+
self.jammer_mode = None
|
25 |
+
self.current_channel = None
|
26 |
self.num_channels = 8
|
27 |
self.channel_bandwidth = 20 # MHz
|
28 |
self.frequency_range = [5180, 5320] # MHz
|
29 |
+
self.frequency_lists = range(5180, 5340, 20) # MHz
|
30 |
|
31 |
self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
|
32 |
self.action_space = spaces.Discrete(self.num_channels)
|
33 |
+
self.jammer_type = jammer_type
|
|
|
34 |
self.jammer_modes = ['constant', 'random', 'sweeping']
|
35 |
+
self.csc = channel_switching_cost
|
|
|
36 |
|
37 |
+
self._max_episode_steps = None
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
def reset(self):
|
40 |
+
self.current_channel = np.random.randint(self.num_channels)
|
41 |
+
|
42 |
+
if self.jammer_type == 'dynamic':
|
43 |
+
self.jammer_mode = np.random.choice(self.jammer_modes)
|
|
|
44 |
else:
|
45 |
+
self.jammer_mode = self.jammer_type
|
46 |
+
self.jammer_frequency = self.frequency_lists[self.current_channel]
|
47 |
+
|
48 |
+
self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
|
49 |
+
return self.observation
|
50 |
|
51 |
def step(self, action):
|
52 |
assert self.action_space.contains(action), "Invalid action"
|
53 |
|
54 |
received_power = self._get_received_power(action)
|
55 |
if received_power >= 0:
|
|
|
|
|
56 |
reward = 0.0
|
57 |
+
else:
|
58 |
+
reward = 1.0
|
59 |
|
60 |
if self.current_channel != action:
|
61 |
+
reward -= self.csc # Channel switching cost
|
62 |
|
63 |
self.current_channel = action
|
64 |
|
|
|
73 |
|
74 |
return self.observation, reward, False, {}
|
75 |
|
76 |
+
def _get_received_power(self, channel_idx):
|
77 |
+
# Simulate received jamming power using normal distribution
|
78 |
+
jammed_power = np.random.normal(loc=30, scale=5)
|
79 |
+
adjacent_power = np.random.normal(loc=13, scale=3)
|
80 |
+
far_away_power = np.random.normal(loc=-7, scale=1)
|
81 |
|
82 |
+
if channel_idx == self.current_channel:
|
83 |
+
return jammed_power
|
84 |
+
elif abs(channel_idx - self.current_channel) == 1:
|
85 |
+
return adjacent_power
|
86 |
+
else:
|
87 |
+
return far_away_power
|
88 |
|
89 |
def render(self, mode='human'):
|
90 |
pass
|
91 |
|
92 |
def close(self):
|
93 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
2 |
|
3 |
def main():
|
4 |
st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
|
@@ -16,11 +20,24 @@ def main():
|
|
16 |
st.write(f"Jammer Type: {jammer_type}")
|
17 |
st.write(f"Channel Switching Cost: {channel_switching_cost}")
|
18 |
|
19 |
-
st.
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
if __name__ == "__main__":
|
26 |
main()
|
|
|
1 |
import streamlit as st
|
2 |
+
import trainer
|
3 |
+
import tester
|
4 |
+
import os
|
5 |
+
|
6 |
|
7 |
def main():
|
8 |
st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
|
|
|
20 |
st.write(f"Jammer Type: {jammer_type}")
|
21 |
st.write(f"Channel Switching Cost: {channel_switching_cost}")
|
22 |
|
23 |
+
if st.button('Train'):
|
24 |
+
st.write("==================================================")
|
25 |
+
st.write('Training Starting')
|
26 |
+
trainer.train(jammer_type, channel_switching_cost)
|
27 |
+
st.write("Training completed")
|
28 |
+
st.write("==================================================")
|
29 |
+
|
30 |
+
if st.button('Test'):
|
31 |
+
st.write("==================================================")
|
32 |
+
st.write('Testing Starting')
|
33 |
+
agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
|
34 |
+
if os.path.exists(agentName):
|
35 |
+
tester.test(jammer_type, channel_switching_cost)
|
36 |
+
st.write("Testing completed")
|
37 |
+
st.write("==================================================")
|
38 |
+
else:
|
39 |
+
st.write("Agent has not been trained yet. Click Train First!!!")
|
40 |
+
|
41 |
|
42 |
if __name__ == "__main__":
|
43 |
main()
|
tester.py
CHANGED
@@ -11,16 +11,12 @@ import matplotlib as mpl
|
|
11 |
import matplotlib.pyplot as plt
|
12 |
import json
|
13 |
from tensorflow import keras
|
14 |
-
from
|
15 |
-
from
|
16 |
|
17 |
-
jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
|
18 |
-
jammerType = jammerTypes[0]
|
19 |
-
network = 'FNN'
|
20 |
-
cscs = [0, 0.1, 0.2, 0.3, 0.4] # Channel switching cost
|
21 |
|
22 |
-
|
23 |
-
env =
|
24 |
ob_space = env.observation_space
|
25 |
ac_space = env.action_space
|
26 |
print("Observation space: ", ob_space, ob_space.dtype)
|
@@ -30,9 +26,7 @@ for csc in cscs:
|
|
30 |
a_size = ac_space.n
|
31 |
total_episodes = 200
|
32 |
max_env_steps = 100
|
33 |
-
|
34 |
-
TRAIN_Episodes = 100
|
35 |
-
remaining_Episodes = 0
|
36 |
env._max_episode_steps = max_env_steps
|
37 |
|
38 |
epsilon = 1.0 # exploration rate
|
@@ -42,69 +36,17 @@ for csc in cscs:
|
|
42 |
lr = 0.001
|
43 |
batch_size = 32
|
44 |
|
|
|
45 |
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
|
|
|
46 |
rewards = [] # Store rewards for graphing
|
47 |
epsilons = [] # Store the Explore/Exploit
|
48 |
|
49 |
-
#
|
50 |
-
for e in range(TRAIN_Episodes):
|
51 |
-
state = env.reset()
|
52 |
-
# print(f"Initial state is: {state}")
|
53 |
-
state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
|
54 |
-
tot_rewards = 0
|
55 |
-
previous_action = 0
|
56 |
-
for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know
|
57 |
-
action = DDQN_agent.action(state)
|
58 |
-
next_state, reward, done, _ = env.step(action)
|
59 |
-
# print(f'The next state is: {next_state}')
|
60 |
-
# done: Three collisions occurred in the last 10 steps.
|
61 |
-
# time == max_env_steps - 1 : No collisions occurred
|
62 |
-
if done or time == max_env_steps - 1:
|
63 |
-
rewards.append(tot_rewards)
|
64 |
-
epsilons.append(DDQN_agent.epsilon)
|
65 |
-
print("episode: {}/{}, score: {}, e: {}"
|
66 |
-
.format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
|
67 |
-
break
|
68 |
-
# Applying channel switching cost
|
69 |
-
if action != previous_action:
|
70 |
-
reward -= csc
|
71 |
-
next_state = np.reshape(next_state, [1, s_size])
|
72 |
-
tot_rewards += reward
|
73 |
-
DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
|
74 |
-
state = next_state
|
75 |
-
previous_action = action
|
76 |
-
|
77 |
-
# Experience Replay
|
78 |
-
if len(DDQN_agent.memory) > batch_size:
|
79 |
-
DDQN_agent.experience_replay(batch_size)
|
80 |
-
# Update the weights after each episode (You can configure this for x steps as well
|
81 |
-
DDQN_agent.update_target_from_model()
|
82 |
-
# If our current NN passes we are done
|
83 |
-
# Early stopping criteria: I am going to use the last 10 runs within 1% of the max
|
84 |
-
if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
|
85 |
-
# Set the rest of the episodes for testing
|
86 |
-
remaining_Episodes = total_episodes - e
|
87 |
-
train_end = e
|
88 |
-
break
|
89 |
-
|
90 |
-
# Testing
|
91 |
-
print('Training complete. Testing started...')
|
92 |
-
# TEST Time
|
93 |
-
# In this section we ALWAYS use exploit as we don't train anymore
|
94 |
-
total_transmissions = 0
|
95 |
-
successful_transmissions = 0
|
96 |
-
if remaining_Episodes == 0:
|
97 |
-
train_end = TRAIN_Episodes
|
98 |
-
TEST_Episodes = 100
|
99 |
-
else:
|
100 |
-
TEST_Episodes = total_episodes - train_end
|
101 |
-
# Testing Loop
|
102 |
-
n_channel_switches = 0
|
103 |
for e_test in range(TEST_Episodes):
|
104 |
state = env.reset()
|
105 |
state = np.reshape(state, [1, s_size])
|
106 |
tot_rewards = 0
|
107 |
-
previous_channel = 0
|
108 |
for t_test in range(max_env_steps):
|
109 |
action = DDQN_agent.test_action(state)
|
110 |
next_state, reward, done, _ = env.step(action)
|
@@ -116,19 +58,11 @@ for csc in cscs:
|
|
116 |
break
|
117 |
next_state = np.reshape(next_state, [1, s_size])
|
118 |
tot_rewards += reward
|
119 |
-
if action != previous_channel:
|
120 |
-
n_channel_switches += 1
|
121 |
-
if reward == 1:
|
122 |
-
successful_transmissions += 1
|
123 |
# DON'T STORE ANYTHING DURING TESTING
|
124 |
state = next_state
|
125 |
-
previous_channel = action
|
126 |
-
# done: More than 3 collisions occurred in the last 10 steps.
|
127 |
-
# t_test == max_env_steps - 1: No collisions occurred
|
128 |
-
total_transmissions += 1
|
129 |
|
130 |
# Plotting
|
131 |
-
plotName = f'results/
|
132 |
rolling_average = np.convolve(rewards, np.ones(10) / 10)
|
133 |
plt.plot(rewards)
|
134 |
plt.plot(rolling_average, color='black')
|
@@ -136,32 +70,13 @@ for csc in cscs:
|
|
136 |
# Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
|
137 |
eps_graph = [200 * x for x in epsilons]
|
138 |
plt.plot(eps_graph, color='g', linestyle='-')
|
139 |
-
# Plot the line where TESTING begins
|
140 |
-
plt.axvline(x=train_end, color='y', linestyle='-')
|
141 |
-
plt.xlim((0, train_end+TEST_Episodes))
|
142 |
-
plt.ylim((0, max_env_steps))
|
143 |
plt.xlabel('Episodes')
|
144 |
plt.ylabel('Rewards')
|
145 |
plt.savefig(plotName, bbox_inches='tight')
|
146 |
-
|
147 |
|
148 |
# Save Results
|
149 |
# Rewards
|
150 |
-
fileName = f'results/
|
151 |
with open(fileName, 'w') as f:
|
152 |
json.dump(rewards, f)
|
153 |
-
# Normalized throughput
|
154 |
-
normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
|
155 |
-
print(f'The normalized throughput is: {normalizedThroughput}')
|
156 |
-
fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
|
157 |
-
with open(fileName, 'w') as f:
|
158 |
-
json.dump(normalizedThroughput, f)
|
159 |
-
# Channel switching times
|
160 |
-
normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
|
161 |
-
print(f'The normalized channel switching times is: {normalized_cst}')
|
162 |
-
fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
|
163 |
-
with open(fileName, 'w') as f:
|
164 |
-
json.dump(normalized_cst, f)
|
165 |
-
# Save the agent as a SavedAgent.
|
166 |
-
agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
|
167 |
-
DDQN_agent.save_model(agentName)
|
|
|
11 |
import matplotlib.pyplot as plt
|
12 |
import json
|
13 |
from tensorflow import keras
|
14 |
+
from DDQN import DoubleDeepQNetwork
|
15 |
+
from antiJamEnv import AntiJamEnv
|
16 |
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
def test(jammer_type, channel_switching_cost):
|
19 |
+
env = AntiJamEnv(jammer_type, channel_switching_cost)
|
20 |
ob_space = env.observation_space
|
21 |
ac_space = env.action_space
|
22 |
print("Observation space: ", ob_space, ob_space.dtype)
|
|
|
26 |
a_size = ac_space.n
|
27 |
total_episodes = 200
|
28 |
max_env_steps = 100
|
29 |
+
TEST_Episodes = 100
|
|
|
|
|
30 |
env._max_episode_steps = max_env_steps
|
31 |
|
32 |
epsilon = 1.0 # exploration rate
|
|
|
36 |
lr = 0.001
|
37 |
batch_size = 32
|
38 |
|
39 |
+
agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
|
40 |
DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
|
41 |
+
DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
|
42 |
rewards = [] # Store rewards for graphing
|
43 |
epsilons = [] # Store the Explore/Exploit
|
44 |
|
45 |
+
# Testing agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
for e_test in range(TEST_Episodes):
|
47 |
state = env.reset()
|
48 |
state = np.reshape(state, [1, s_size])
|
49 |
tot_rewards = 0
|
|
|
50 |
for t_test in range(max_env_steps):
|
51 |
action = DDQN_agent.test_action(state)
|
52 |
next_state, reward, done, _ = env.step(action)
|
|
|
58 |
break
|
59 |
next_state = np.reshape(next_state, [1, s_size])
|
60 |
tot_rewards += reward
|
|
|
|
|
|
|
|
|
61 |
# DON'T STORE ANYTHING DURING TESTING
|
62 |
state = next_state
|
|
|
|
|
|
|
|
|
63 |
|
64 |
# Plotting
|
65 |
+
plotName = f'results/test/rewards_{jammer_type}_csc_{channel_switching_cost}.png'
|
66 |
rolling_average = np.convolve(rewards, np.ones(10) / 10)
|
67 |
plt.plot(rewards)
|
68 |
plt.plot(rolling_average, color='black')
|
|
|
70 |
# Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
|
71 |
eps_graph = [200 * x for x in epsilons]
|
72 |
plt.plot(eps_graph, color='g', linestyle='-')
|
|
|
|
|
|
|
|
|
73 |
plt.xlabel('Episodes')
|
74 |
plt.ylabel('Rewards')
|
75 |
plt.savefig(plotName, bbox_inches='tight')
|
76 |
+
plt.show()
|
77 |
|
78 |
# Save Results
|
79 |
# Rewards
|
80 |
+
fileName = f'results/test/rewards_{jammer_type}_csc_{channel_switching_cost}.json'
|
81 |
with open(fileName, 'w') as f:
|
82 |
json.dump(rewards, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trainer.py
CHANGED
@@ -12,14 +12,11 @@ import matplotlib.pyplot as plt
|
|
12 |
import json
|
13 |
from tensorflow import keras
|
14 |
from DDQN import DoubleDeepQNetwork
|
|
|
15 |
|
16 |
-
jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
|
17 |
-
jammerType = jammerTypes[0]
|
18 |
-
network = 'FNN'
|
19 |
-
cscs = [0, 0.05, 0.1, 0.15, 0.2] # Channel switching cost
|
20 |
|
21 |
-
|
22 |
-
env =
|
23 |
ob_space = env.observation_space
|
24 |
ac_space = env.action_space
|
25 |
print("Observation space: ", ob_space, ob_space.dtype)
|
@@ -27,11 +24,8 @@ for csc in cscs:
|
|
27 |
|
28 |
s_size = ob_space.shape[0]
|
29 |
a_size = ac_space.n
|
30 |
-
total_episodes = 200
|
31 |
max_env_steps = 100
|
32 |
-
train_end = 0
|
33 |
TRAIN_Episodes = 100
|
34 |
-
remaining_Episodes = 0
|
35 |
env._max_episode_steps = max_env_steps
|
36 |
|
37 |
epsilon = 1.0 # exploration rate
|
@@ -65,13 +59,10 @@ for csc in cscs:
|
|
65 |
.format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
|
66 |
break
|
67 |
# Applying channel switching cost
|
68 |
-
if action != previous_action:
|
69 |
-
reward -= csc
|
70 |
next_state = np.reshape(next_state, [1, s_size])
|
71 |
tot_rewards += reward
|
72 |
DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
|
73 |
state = next_state
|
74 |
-
previous_action = action
|
75 |
|
76 |
# Experience Replay
|
77 |
if len(DDQN_agent.memory) > batch_size:
|
@@ -81,86 +72,28 @@ for csc in cscs:
|
|
81 |
# If our current NN passes we are done
|
82 |
# Early stopping criteria: I am going to use the last 10 runs within 1% of the max
|
83 |
if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
|
84 |
-
# Set the rest of the episodes for testing
|
85 |
-
remaining_Episodes = total_episodes - e
|
86 |
-
train_end = e
|
87 |
break
|
88 |
|
89 |
-
# Testing
|
90 |
-
print('Training complete. Testing started...')
|
91 |
-
# TEST Time
|
92 |
-
# In this section we ALWAYS use exploit as we don't train anymore
|
93 |
-
total_transmissions = 0
|
94 |
-
successful_transmissions = 0
|
95 |
-
if remaining_Episodes == 0:
|
96 |
-
train_end = TRAIN_Episodes
|
97 |
-
TEST_Episodes = 100
|
98 |
-
else:
|
99 |
-
TEST_Episodes = total_episodes - train_end
|
100 |
-
# Testing Loop
|
101 |
-
n_channel_switches = 0
|
102 |
-
for e_test in range(TEST_Episodes):
|
103 |
-
state = env.reset()
|
104 |
-
state = np.reshape(state, [1, s_size])
|
105 |
-
tot_rewards = 0
|
106 |
-
previous_channel = 0
|
107 |
-
for t_test in range(max_env_steps):
|
108 |
-
action = DDQN_agent.test_action(state)
|
109 |
-
next_state, reward, done, _ = env.step(action)
|
110 |
-
if done or t_test == max_env_steps - 1:
|
111 |
-
rewards.append(tot_rewards)
|
112 |
-
epsilons.append(0) # We are doing full exploit
|
113 |
-
print("episode: {}/{}, score: {}, e: {}"
|
114 |
-
.format(e_test, TEST_Episodes, tot_rewards, 0))
|
115 |
-
break
|
116 |
-
next_state = np.reshape(next_state, [1, s_size])
|
117 |
-
tot_rewards += reward
|
118 |
-
if action != previous_channel:
|
119 |
-
n_channel_switches += 1
|
120 |
-
if reward == 1:
|
121 |
-
successful_transmissions += 1
|
122 |
-
# DON'T STORE ANYTHING DURING TESTING
|
123 |
-
state = next_state
|
124 |
-
previous_channel = action
|
125 |
-
# done: More than 3 collisions occurred in the last 10 steps.
|
126 |
-
# t_test == max_env_steps - 1: No collisions occurred
|
127 |
-
total_transmissions += 1
|
128 |
-
|
129 |
# Plotting
|
130 |
-
plotName = f'results/
|
131 |
rolling_average = np.convolve(rewards, np.ones(10) / 10)
|
132 |
plt.plot(rewards)
|
133 |
plt.plot(rolling_average, color='black')
|
134 |
plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
|
135 |
-
# Scale Epsilon (0.001 - 1.0) to match reward (0 -
|
136 |
-
eps_graph = [
|
137 |
plt.plot(eps_graph, color='g', linestyle='-')
|
138 |
-
# Plot the line where TESTING begins
|
139 |
-
plt.axvline(x=train_end, color='y', linestyle='-')
|
140 |
-
plt.xlim((0, train_end+TEST_Episodes))
|
141 |
-
plt.ylim((0, max_env_steps))
|
142 |
plt.xlabel('Episodes')
|
143 |
plt.ylabel('Rewards')
|
144 |
plt.savefig(plotName, bbox_inches='tight')
|
145 |
-
|
146 |
|
147 |
# Save Results
|
148 |
# Rewards
|
149 |
-
fileName = f'results/
|
150 |
with open(fileName, 'w') as f:
|
151 |
json.dump(rewards, f)
|
152 |
-
|
153 |
-
normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
|
154 |
-
print(f'The normalized throughput is: {normalizedThroughput}')
|
155 |
-
fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
|
156 |
-
with open(fileName, 'w') as f:
|
157 |
-
json.dump(normalizedThroughput, f)
|
158 |
-
# Channel switching times
|
159 |
-
normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
|
160 |
-
print(f'The normalized channel switching times is: {normalized_cst}')
|
161 |
-
fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
|
162 |
-
with open(fileName, 'w') as f:
|
163 |
-
json.dump(normalized_cst, f)
|
164 |
# Save the agent as a SavedAgent.
|
165 |
-
agentName = f'savedAgents/
|
166 |
DDQN_agent.save_model(agentName)
|
|
|
12 |
import json
|
13 |
from tensorflow import keras
|
14 |
from DDQN import DoubleDeepQNetwork
|
15 |
+
from antiJamEnv import AntiJamEnv
|
16 |
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
def train(jammer_type, channel_switching_cost):
|
19 |
+
env = AntiJamEnv(jammer_type, channel_switching_cost)
|
20 |
ob_space = env.observation_space
|
21 |
ac_space = env.action_space
|
22 |
print("Observation space: ", ob_space, ob_space.dtype)
|
|
|
24 |
|
25 |
s_size = ob_space.shape[0]
|
26 |
a_size = ac_space.n
|
|
|
27 |
max_env_steps = 100
|
|
|
28 |
TRAIN_Episodes = 100
|
|
|
29 |
env._max_episode_steps = max_env_steps
|
30 |
|
31 |
epsilon = 1.0 # exploration rate
|
|
|
59 |
.format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
|
60 |
break
|
61 |
# Applying channel switching cost
|
|
|
|
|
62 |
next_state = np.reshape(next_state, [1, s_size])
|
63 |
tot_rewards += reward
|
64 |
DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
|
65 |
state = next_state
|
|
|
66 |
|
67 |
# Experience Replay
|
68 |
if len(DDQN_agent.memory) > batch_size:
|
|
|
72 |
# If our current NN passes we are done
|
73 |
# Early stopping criteria: I am going to use the last 10 runs within 1% of the max
|
74 |
if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
|
|
|
|
|
|
|
75 |
break
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
# Plotting
|
78 |
+
plotName = f'results/train/rewards_{jammer_type}_csc_{channel_switching_cost}.png'
|
79 |
rolling_average = np.convolve(rewards, np.ones(10) / 10)
|
80 |
plt.plot(rewards)
|
81 |
plt.plot(rolling_average, color='black')
|
82 |
plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
|
83 |
+
# Scale Epsilon (0.001 - 1.0) to match reward (0 - 100) range
|
84 |
+
eps_graph = [100 * x for x in epsilons]
|
85 |
plt.plot(eps_graph, color='g', linestyle='-')
|
|
|
|
|
|
|
|
|
86 |
plt.xlabel('Episodes')
|
87 |
plt.ylabel('Rewards')
|
88 |
plt.savefig(plotName, bbox_inches='tight')
|
89 |
+
plt.show()
|
90 |
|
91 |
# Save Results
|
92 |
# Rewards
|
93 |
+
fileName = f'results/train/rewards_{jammer_type}_csc_{channel_switching_cost}.json'
|
94 |
with open(fileName, 'w') as f:
|
95 |
json.dump(rewards, f)
|
96 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
# Save the agent as a SavedAgent.
|
98 |
+
agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
|
99 |
DDQN_agent.save_model(agentName)
|