asataura commited on
Commit
c4ae7c9
·
1 Parent(s): 9c64fc0

Updating the AntiJamEnv

Browse files
Files changed (5) hide show
  1. DDQN.py +5 -6
  2. antiJamEnv.py +34 -38
  3. app.py +22 -5
  4. tester.py +11 -96
  5. trainer.py +10 -77
DDQN.py CHANGED
@@ -1,6 +1,7 @@
1
  import numpy as np
2
  from collections import deque
3
  from tensorflow import keras
 
4
  import random
5
 
6
 
@@ -24,14 +25,9 @@ class DoubleDeepQNetwork:
24
  def build_model(self):
25
  model = keras.Sequential() # linear stack of layers https://keras.io/models/sequential/
26
  model.add(keras.layers.Dense(24, input_dim=self.history * self.nS, activation='relu')) # [Input] -> Layer 1
27
- # Dense: Densely connected layer https://keras.io/layers/core/
28
- # 24: Number of neurons
29
- # input_dim: Number of input variables
30
- # activation: Rectified Linear Unit (relu) ranges >= 0
31
  model.add(keras.layers.Dense(24, activation='relu')) # Layer 2 -> 3
32
  model.add(keras.layers.Dense(self.nA, activation='linear')) # Layer 3 -> [output]
33
- # Size has to match the output (different actions)
34
- # Linear activation on the last layer
35
  model.compile(loss='mean_squared_error', # Loss function: Mean Squared Error
36
  optimizer=keras.optimizers.Adam(
37
  lr=self.alpha)) # Optimizer: Adam (Feel free to check other options)
@@ -59,6 +55,9 @@ class DoubleDeepQNetwork:
59
  # Save the agent model weights in a file
60
  self.model.save(agentName)
61
 
 
 
 
62
  def experience_replay(self, batch_size):
63
  # Execute the experience replay
64
  minibatch = random.sample(self.memory, batch_size) # Randomly sample from memory
 
1
  import numpy as np
2
  from collections import deque
3
  from tensorflow import keras
4
+ from tensorflow.keras.models import load_model
5
  import random
6
 
7
 
 
25
  def build_model(self):
26
  model = keras.Sequential() # linear stack of layers https://keras.io/models/sequential/
27
  model.add(keras.layers.Dense(24, input_dim=self.history * self.nS, activation='relu')) # [Input] -> Layer 1
 
 
 
 
28
  model.add(keras.layers.Dense(24, activation='relu')) # Layer 2 -> 3
29
  model.add(keras.layers.Dense(self.nA, activation='linear')) # Layer 3 -> [output]
30
+
 
31
  model.compile(loss='mean_squared_error', # Loss function: Mean Squared Error
32
  optimizer=keras.optimizers.Adam(
33
  lr=self.alpha)) # Optimizer: Adam (Feel free to check other options)
 
55
  # Save the agent model weights in a file
56
  self.model.save(agentName)
57
 
58
+ def load_saved_model(self, agent_name):
59
+ return load_model(agent_name)
60
+
61
  def experience_replay(self, batch_size):
62
  # Execute the experience replay
63
  minibatch = random.sample(self.memory, batch_size) # Randomly sample from memory
antiJamEnv.py CHANGED
@@ -16,47 +16,49 @@ import numpy as np
16
 
17
 
18
  class AntiJamEnv(gym.Env):
19
- def __init__(self):
20
  super(AntiJamEnv, self).__init__()
21
 
 
 
 
 
22
  self.num_channels = 8
23
  self.channel_bandwidth = 20 # MHz
24
  self.frequency_range = [5180, 5320] # MHz
 
25
 
26
  self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
27
  self.action_space = spaces.Discrete(self.num_channels)
28
-
29
- self.current_channel = np.random.randint(self.num_channels)
30
  self.jammer_modes = ['constant', 'random', 'sweeping']
31
- self.jammer_mode = np.random.choice(self.jammer_modes)
32
- self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
33
 
34
- def _get_received_power(self, channel_idx):
35
- # Simulate received jamming power using normal distribution
36
- jammed_power = np.random.normal(loc=30, scale=5)
37
- adjacent_power = np.random.normal(loc=13, scale=3)
38
- far_away_power = np.random.normal(loc=-7, scale=1)
39
 
40
- if channel_idx == self.current_channel:
41
- return jammed_power
42
- elif abs(channel_idx - self.current_channel) == 1:
43
- return adjacent_power
44
- elif abs(channel_idx - self.current_channel) >= 3:
45
- return far_away_power
46
  else:
47
- return -30 # Unjammed
 
 
 
 
48
 
49
  def step(self, action):
50
  assert self.action_space.contains(action), "Invalid action"
51
 
52
  received_power = self._get_received_power(action)
53
  if received_power >= 0:
54
- reward = 1.0
55
- else:
56
  reward = 0.0
 
 
57
 
58
  if self.current_channel != action:
59
- reward *= 0.9 # Channel switching cost
60
 
61
  self.current_channel = action
62
 
@@ -71,27 +73,21 @@ class AntiJamEnv(gym.Env):
71
 
72
  return self.observation, reward, False, {}
73
 
74
- def reset(self):
75
- self.current_channel = np.random.randint(self.num_channels)
76
- self.jammer_mode = np.random.choice(self.jammer_modes)
77
- self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
 
78
 
79
- self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
80
- return self.observation
 
 
 
 
81
 
82
  def render(self, mode='human'):
83
  pass
84
 
85
  def close(self):
86
- pass
87
-
88
-
89
- # Test the environment
90
- env = AntiJamEnv()
91
- observation = env.reset()
92
- for _ in range(10):
93
- action = env.action_space.sample()
94
- observation, reward, done, _ = env.step(action)
95
- print("Action:", action, "Reward:", reward, "Observation:", observation)
96
- if done:
97
- break
 
16
 
17
 
18
  class AntiJamEnv(gym.Env):
19
+ def __init__(self, jammer_type, channel_switching_cost):
20
  super(AntiJamEnv, self).__init__()
21
 
22
+ self.observation = None
23
+ self.jammer_frequency = None
24
+ self.jammer_mode = None
25
+ self.current_channel = None
26
  self.num_channels = 8
27
  self.channel_bandwidth = 20 # MHz
28
  self.frequency_range = [5180, 5320] # MHz
29
+ self.frequency_lists = range(5180, 5340, 20) # MHz
30
 
31
  self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
32
  self.action_space = spaces.Discrete(self.num_channels)
33
+ self.jammer_type = jammer_type
 
34
  self.jammer_modes = ['constant', 'random', 'sweeping']
35
+ self.csc = channel_switching_cost
 
36
 
37
+ self._max_episode_steps = None
 
 
 
 
38
 
39
+ def reset(self):
40
+ self.current_channel = np.random.randint(self.num_channels)
41
+
42
+ if self.jammer_type == 'dynamic':
43
+ self.jammer_mode = np.random.choice(self.jammer_modes)
 
44
  else:
45
+ self.jammer_mode = self.jammer_type
46
+ self.jammer_frequency = self.frequency_lists[self.current_channel]
47
+
48
+ self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
49
+ return self.observation
50
 
51
  def step(self, action):
52
  assert self.action_space.contains(action), "Invalid action"
53
 
54
  received_power = self._get_received_power(action)
55
  if received_power >= 0:
 
 
56
  reward = 0.0
57
+ else:
58
+ reward = 1.0
59
 
60
  if self.current_channel != action:
61
+ reward -= self.csc # Channel switching cost
62
 
63
  self.current_channel = action
64
 
 
73
 
74
  return self.observation, reward, False, {}
75
 
76
+ def _get_received_power(self, channel_idx):
77
+ # Simulate received jamming power using normal distribution
78
+ jammed_power = np.random.normal(loc=30, scale=5)
79
+ adjacent_power = np.random.normal(loc=13, scale=3)
80
+ far_away_power = np.random.normal(loc=-7, scale=1)
81
 
82
+ if channel_idx == self.current_channel:
83
+ return jammed_power
84
+ elif abs(channel_idx - self.current_channel) == 1:
85
+ return adjacent_power
86
+ else:
87
+ return far_away_power
88
 
89
  def render(self, mode='human'):
90
  pass
91
 
92
  def close(self):
93
+ pass
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,4 +1,8 @@
1
  import streamlit as st
 
 
 
 
2
 
3
  def main():
4
  st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
@@ -16,11 +20,24 @@ def main():
16
  st.write(f"Jammer Type: {jammer_type}")
17
  st.write(f"Channel Switching Cost: {channel_switching_cost}")
18
 
19
- st.write("==================================================")
20
- st.write("Training Starting")
21
- st.write("Training completed")
22
- st.write("==================================================")
23
- st.write("")
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  if __name__ == "__main__":
26
  main()
 
1
  import streamlit as st
2
+ import trainer
3
+ import tester
4
+ import os
5
+
6
 
7
  def main():
8
  st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
 
20
  st.write(f"Jammer Type: {jammer_type}")
21
  st.write(f"Channel Switching Cost: {channel_switching_cost}")
22
 
23
+ if st.button('Train'):
24
+ st.write("==================================================")
25
+ st.write('Training Starting')
26
+ trainer.train(jammer_type, channel_switching_cost)
27
+ st.write("Training completed")
28
+ st.write("==================================================")
29
+
30
+ if st.button('Test'):
31
+ st.write("==================================================")
32
+ st.write('Testing Starting')
33
+ agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
34
+ if os.path.exists(agentName):
35
+ tester.test(jammer_type, channel_switching_cost)
36
+ st.write("Testing completed")
37
+ st.write("==================================================")
38
+ else:
39
+ st.write("Agent has not been trained yet. Click Train First!!!")
40
+
41
 
42
  if __name__ == "__main__":
43
  main()
tester.py CHANGED
@@ -11,16 +11,12 @@ import matplotlib as mpl
11
  import matplotlib.pyplot as plt
12
  import json
13
  from tensorflow import keras
14
- from ns3gym import ns3env
15
- from DDQN_FNN import DoubleDeepQNetwork
16
 
17
- jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
18
- jammerType = jammerTypes[0]
19
- network = 'FNN'
20
- cscs = [0, 0.1, 0.2, 0.3, 0.4] # Channel switching cost
21
 
22
- for csc in cscs:
23
- env = gym.make('ns3-v0')
24
  ob_space = env.observation_space
25
  ac_space = env.action_space
26
  print("Observation space: ", ob_space, ob_space.dtype)
@@ -30,9 +26,7 @@ for csc in cscs:
30
  a_size = ac_space.n
31
  total_episodes = 200
32
  max_env_steps = 100
33
- train_end = 0
34
- TRAIN_Episodes = 100
35
- remaining_Episodes = 0
36
  env._max_episode_steps = max_env_steps
37
 
38
  epsilon = 1.0 # exploration rate
@@ -42,69 +36,17 @@ for csc in cscs:
42
  lr = 0.001
43
  batch_size = 32
44
 
 
45
  DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
 
46
  rewards = [] # Store rewards for graphing
47
  epsilons = [] # Store the Explore/Exploit
48
 
49
- # Training agent
50
- for e in range(TRAIN_Episodes):
51
- state = env.reset()
52
- # print(f"Initial state is: {state}")
53
- state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict
54
- tot_rewards = 0
55
- previous_action = 0
56
- for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know
57
- action = DDQN_agent.action(state)
58
- next_state, reward, done, _ = env.step(action)
59
- # print(f'The next state is: {next_state}')
60
- # done: Three collisions occurred in the last 10 steps.
61
- # time == max_env_steps - 1 : No collisions occurred
62
- if done or time == max_env_steps - 1:
63
- rewards.append(tot_rewards)
64
- epsilons.append(DDQN_agent.epsilon)
65
- print("episode: {}/{}, score: {}, e: {}"
66
- .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
67
- break
68
- # Applying channel switching cost
69
- if action != previous_action:
70
- reward -= csc
71
- next_state = np.reshape(next_state, [1, s_size])
72
- tot_rewards += reward
73
- DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
74
- state = next_state
75
- previous_action = action
76
-
77
- # Experience Replay
78
- if len(DDQN_agent.memory) > batch_size:
79
- DDQN_agent.experience_replay(batch_size)
80
- # Update the weights after each episode (You can configure this for x steps as well
81
- DDQN_agent.update_target_from_model()
82
- # If our current NN passes we are done
83
- # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
84
- if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
85
- # Set the rest of the episodes for testing
86
- remaining_Episodes = total_episodes - e
87
- train_end = e
88
- break
89
-
90
- # Testing
91
- print('Training complete. Testing started...')
92
- # TEST Time
93
- # In this section we ALWAYS use exploit as we don't train anymore
94
- total_transmissions = 0
95
- successful_transmissions = 0
96
- if remaining_Episodes == 0:
97
- train_end = TRAIN_Episodes
98
- TEST_Episodes = 100
99
- else:
100
- TEST_Episodes = total_episodes - train_end
101
- # Testing Loop
102
- n_channel_switches = 0
103
  for e_test in range(TEST_Episodes):
104
  state = env.reset()
105
  state = np.reshape(state, [1, s_size])
106
  tot_rewards = 0
107
- previous_channel = 0
108
  for t_test in range(max_env_steps):
109
  action = DDQN_agent.test_action(state)
110
  next_state, reward, done, _ = env.step(action)
@@ -116,19 +58,11 @@ for csc in cscs:
116
  break
117
  next_state = np.reshape(next_state, [1, s_size])
118
  tot_rewards += reward
119
- if action != previous_channel:
120
- n_channel_switches += 1
121
- if reward == 1:
122
- successful_transmissions += 1
123
  # DON'T STORE ANYTHING DURING TESTING
124
  state = next_state
125
- previous_channel = action
126
- # done: More than 3 collisions occurred in the last 10 steps.
127
- # t_test == max_env_steps - 1: No collisions occurred
128
- total_transmissions += 1
129
 
130
  # Plotting
131
- plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
132
  rolling_average = np.convolve(rewards, np.ones(10) / 10)
133
  plt.plot(rewards)
134
  plt.plot(rolling_average, color='black')
@@ -136,32 +70,13 @@ for csc in cscs:
136
  # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
137
  eps_graph = [200 * x for x in epsilons]
138
  plt.plot(eps_graph, color='g', linestyle='-')
139
- # Plot the line where TESTING begins
140
- plt.axvline(x=train_end, color='y', linestyle='-')
141
- plt.xlim((0, train_end+TEST_Episodes))
142
- plt.ylim((0, max_env_steps))
143
  plt.xlabel('Episodes')
144
  plt.ylabel('Rewards')
145
  plt.savefig(plotName, bbox_inches='tight')
146
- # plt.show()
147
 
148
  # Save Results
149
  # Rewards
150
- fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
151
  with open(fileName, 'w') as f:
152
  json.dump(rewards, f)
153
- # Normalized throughput
154
- normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
155
- print(f'The normalized throughput is: {normalizedThroughput}')
156
- fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
157
- with open(fileName, 'w') as f:
158
- json.dump(normalizedThroughput, f)
159
- # Channel switching times
160
- normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
161
- print(f'The normalized channel switching times is: {normalized_cst}')
162
- fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
163
- with open(fileName, 'w') as f:
164
- json.dump(normalized_cst, f)
165
- # Save the agent as a SavedAgent.
166
- agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
167
- DDQN_agent.save_model(agentName)
 
11
  import matplotlib.pyplot as plt
12
  import json
13
  from tensorflow import keras
14
+ from DDQN import DoubleDeepQNetwork
15
+ from antiJamEnv import AntiJamEnv
16
 
 
 
 
 
17
 
18
+ def test(jammer_type, channel_switching_cost):
19
+ env = AntiJamEnv(jammer_type, channel_switching_cost)
20
  ob_space = env.observation_space
21
  ac_space = env.action_space
22
  print("Observation space: ", ob_space, ob_space.dtype)
 
26
  a_size = ac_space.n
27
  total_episodes = 200
28
  max_env_steps = 100
29
+ TEST_Episodes = 100
 
 
30
  env._max_episode_steps = max_env_steps
31
 
32
  epsilon = 1.0 # exploration rate
 
36
  lr = 0.001
37
  batch_size = 32
38
 
39
+ agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
40
  DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
41
+ DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
42
  rewards = [] # Store rewards for graphing
43
  epsilons = [] # Store the Explore/Exploit
44
 
45
+ # Testing agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  for e_test in range(TEST_Episodes):
47
  state = env.reset()
48
  state = np.reshape(state, [1, s_size])
49
  tot_rewards = 0
 
50
  for t_test in range(max_env_steps):
51
  action = DDQN_agent.test_action(state)
52
  next_state, reward, done, _ = env.step(action)
 
58
  break
59
  next_state = np.reshape(next_state, [1, s_size])
60
  tot_rewards += reward
 
 
 
 
61
  # DON'T STORE ANYTHING DURING TESTING
62
  state = next_state
 
 
 
 
63
 
64
  # Plotting
65
+ plotName = f'results/test/rewards_{jammer_type}_csc_{channel_switching_cost}.png'
66
  rolling_average = np.convolve(rewards, np.ones(10) / 10)
67
  plt.plot(rewards)
68
  plt.plot(rolling_average, color='black')
 
70
  # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
71
  eps_graph = [200 * x for x in epsilons]
72
  plt.plot(eps_graph, color='g', linestyle='-')
 
 
 
 
73
  plt.xlabel('Episodes')
74
  plt.ylabel('Rewards')
75
  plt.savefig(plotName, bbox_inches='tight')
76
+ plt.show()
77
 
78
  # Save Results
79
  # Rewards
80
+ fileName = f'results/test/rewards_{jammer_type}_csc_{channel_switching_cost}.json'
81
  with open(fileName, 'w') as f:
82
  json.dump(rewards, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
trainer.py CHANGED
@@ -12,14 +12,11 @@ import matplotlib.pyplot as plt
12
  import json
13
  from tensorflow import keras
14
  from DDQN import DoubleDeepQNetwork
 
15
 
16
- jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
17
- jammerType = jammerTypes[0]
18
- network = 'FNN'
19
- cscs = [0, 0.05, 0.1, 0.15, 0.2] # Channel switching cost
20
 
21
- for csc in cscs:
22
- env = gym.make('ns3-v0')
23
  ob_space = env.observation_space
24
  ac_space = env.action_space
25
  print("Observation space: ", ob_space, ob_space.dtype)
@@ -27,11 +24,8 @@ for csc in cscs:
27
 
28
  s_size = ob_space.shape[0]
29
  a_size = ac_space.n
30
- total_episodes = 200
31
  max_env_steps = 100
32
- train_end = 0
33
  TRAIN_Episodes = 100
34
- remaining_Episodes = 0
35
  env._max_episode_steps = max_env_steps
36
 
37
  epsilon = 1.0 # exploration rate
@@ -65,13 +59,10 @@ for csc in cscs:
65
  .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
66
  break
67
  # Applying channel switching cost
68
- if action != previous_action:
69
- reward -= csc
70
  next_state = np.reshape(next_state, [1, s_size])
71
  tot_rewards += reward
72
  DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
73
  state = next_state
74
- previous_action = action
75
 
76
  # Experience Replay
77
  if len(DDQN_agent.memory) > batch_size:
@@ -81,86 +72,28 @@ for csc in cscs:
81
  # If our current NN passes we are done
82
  # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
83
  if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
84
- # Set the rest of the episodes for testing
85
- remaining_Episodes = total_episodes - e
86
- train_end = e
87
  break
88
 
89
- # Testing
90
- print('Training complete. Testing started...')
91
- # TEST Time
92
- # In this section we ALWAYS use exploit as we don't train anymore
93
- total_transmissions = 0
94
- successful_transmissions = 0
95
- if remaining_Episodes == 0:
96
- train_end = TRAIN_Episodes
97
- TEST_Episodes = 100
98
- else:
99
- TEST_Episodes = total_episodes - train_end
100
- # Testing Loop
101
- n_channel_switches = 0
102
- for e_test in range(TEST_Episodes):
103
- state = env.reset()
104
- state = np.reshape(state, [1, s_size])
105
- tot_rewards = 0
106
- previous_channel = 0
107
- for t_test in range(max_env_steps):
108
- action = DDQN_agent.test_action(state)
109
- next_state, reward, done, _ = env.step(action)
110
- if done or t_test == max_env_steps - 1:
111
- rewards.append(tot_rewards)
112
- epsilons.append(0) # We are doing full exploit
113
- print("episode: {}/{}, score: {}, e: {}"
114
- .format(e_test, TEST_Episodes, tot_rewards, 0))
115
- break
116
- next_state = np.reshape(next_state, [1, s_size])
117
- tot_rewards += reward
118
- if action != previous_channel:
119
- n_channel_switches += 1
120
- if reward == 1:
121
- successful_transmissions += 1
122
- # DON'T STORE ANYTHING DURING TESTING
123
- state = next_state
124
- previous_channel = action
125
- # done: More than 3 collisions occurred in the last 10 steps.
126
- # t_test == max_env_steps - 1: No collisions occurred
127
- total_transmissions += 1
128
-
129
  # Plotting
130
- plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
131
  rolling_average = np.convolve(rewards, np.ones(10) / 10)
132
  plt.plot(rewards)
133
  plt.plot(rolling_average, color='black')
134
  plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
135
- # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
136
- eps_graph = [200 * x for x in epsilons]
137
  plt.plot(eps_graph, color='g', linestyle='-')
138
- # Plot the line where TESTING begins
139
- plt.axvline(x=train_end, color='y', linestyle='-')
140
- plt.xlim((0, train_end+TEST_Episodes))
141
- plt.ylim((0, max_env_steps))
142
  plt.xlabel('Episodes')
143
  plt.ylabel('Rewards')
144
  plt.savefig(plotName, bbox_inches='tight')
145
- # plt.show()
146
 
147
  # Save Results
148
  # Rewards
149
- fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
150
  with open(fileName, 'w') as f:
151
  json.dump(rewards, f)
152
- # Normalized throughput
153
- normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
154
- print(f'The normalized throughput is: {normalizedThroughput}')
155
- fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
156
- with open(fileName, 'w') as f:
157
- json.dump(normalizedThroughput, f)
158
- # Channel switching times
159
- normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
160
- print(f'The normalized channel switching times is: {normalized_cst}')
161
- fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
162
- with open(fileName, 'w') as f:
163
- json.dump(normalized_cst, f)
164
  # Save the agent as a SavedAgent.
165
- agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
166
  DDQN_agent.save_model(agentName)
 
12
  import json
13
  from tensorflow import keras
14
  from DDQN import DoubleDeepQNetwork
15
+ from antiJamEnv import AntiJamEnv
16
 
 
 
 
 
17
 
18
+ def train(jammer_type, channel_switching_cost):
19
+ env = AntiJamEnv(jammer_type, channel_switching_cost)
20
  ob_space = env.observation_space
21
  ac_space = env.action_space
22
  print("Observation space: ", ob_space, ob_space.dtype)
 
24
 
25
  s_size = ob_space.shape[0]
26
  a_size = ac_space.n
 
27
  max_env_steps = 100
 
28
  TRAIN_Episodes = 100
 
29
  env._max_episode_steps = max_env_steps
30
 
31
  epsilon = 1.0 # exploration rate
 
59
  .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
60
  break
61
  # Applying channel switching cost
 
 
62
  next_state = np.reshape(next_state, [1, s_size])
63
  tot_rewards += reward
64
  DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict
65
  state = next_state
 
66
 
67
  # Experience Replay
68
  if len(DDQN_agent.memory) > batch_size:
 
72
  # If our current NN passes we are done
73
  # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
74
  if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
 
 
 
75
  break
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # Plotting
78
+ plotName = f'results/train/rewards_{jammer_type}_csc_{channel_switching_cost}.png'
79
  rolling_average = np.convolve(rewards, np.ones(10) / 10)
80
  plt.plot(rewards)
81
  plt.plot(rolling_average, color='black')
82
  plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line
83
+ # Scale Epsilon (0.001 - 1.0) to match reward (0 - 100) range
84
+ eps_graph = [100 * x for x in epsilons]
85
  plt.plot(eps_graph, color='g', linestyle='-')
 
 
 
 
86
  plt.xlabel('Episodes')
87
  plt.ylabel('Rewards')
88
  plt.savefig(plotName, bbox_inches='tight')
89
+ plt.show()
90
 
91
  # Save Results
92
  # Rewards
93
+ fileName = f'results/train/rewards_{jammer_type}_csc_{channel_switching_cost}.json'
94
  with open(fileName, 'w') as f:
95
  json.dump(rewards, f)
96
+
 
 
 
 
 
 
 
 
 
 
 
97
  # Save the agent as a SavedAgent.
98
+ agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
99
  DDQN_agent.save_model(agentName)