Spaces:

asataura
/

jam_shield_LLM_app

Sleeping

App Files Files Community

asataura commited on Aug 14, 2023

Commit

c4ae7c9

1 Parent(s): 9c64fc0

Updating the AntiJamEnv

Browse files

Files changed (5) hide show

DDQN.py +5 -6
antiJamEnv.py +34 -38
app.py +22 -5
tester.py +11 -96
trainer.py +10 -77

DDQN.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import numpy as np
 from collections import deque
 from tensorflow import keras
 import random
@@ -24,14 +25,9 @@ class DoubleDeepQNetwork:
     def build_model(self):
         model = keras.Sequential()  # linear stack of layers https://keras.io/models/sequential/
         model.add(keras.layers.Dense(24, input_dim=self.history * self.nS, activation='relu'))  # [Input] -> Layer 1
-        #   Dense: Densely connected layer https://keras.io/layers/core/
-        #   24: Number of neurons
-        #   input_dim: Number of input variables
-        #   activation: Rectified Linear Unit (relu) ranges >= 0
         model.add(keras.layers.Dense(24, activation='relu'))  # Layer 2 -> 3
         model.add(keras.layers.Dense(self.nA, activation='linear'))  # Layer 3 -> [output]
-        #   Size has to match the output (different actions)
-        #   Linear activation on the last layer
         model.compile(loss='mean_squared_error',  # Loss function: Mean Squared Error
                       optimizer=keras.optimizers.Adam(
                           lr=self.alpha))  # Optimizer: Adam (Feel free to check other options)
@@ -59,6 +55,9 @@ class DoubleDeepQNetwork:
         # Save the agent model weights in a file
         self.model.save(agentName)
     def experience_replay(self, batch_size):
         # Execute the experience replay
         minibatch = random.sample(self.memory, batch_size)  # Randomly sample from memory

 import numpy as np
 from collections import deque
 from tensorflow import keras
+from tensorflow.keras.models import load_model
 import random
     def build_model(self):
         model = keras.Sequential()  # linear stack of layers https://keras.io/models/sequential/
         model.add(keras.layers.Dense(24, input_dim=self.history * self.nS, activation='relu'))  # [Input] -> Layer 1
         model.add(keras.layers.Dense(24, activation='relu'))  # Layer 2 -> 3
         model.add(keras.layers.Dense(self.nA, activation='linear'))  # Layer 3 -> [output]
         model.compile(loss='mean_squared_error',  # Loss function: Mean Squared Error
                       optimizer=keras.optimizers.Adam(
                           lr=self.alpha))  # Optimizer: Adam (Feel free to check other options)
         # Save the agent model weights in a file
         self.model.save(agentName)
+    def load_saved_model(self, agent_name):
+        return load_model(agent_name)
     def experience_replay(self, batch_size):
         # Execute the experience replay
         minibatch = random.sample(self.memory, batch_size)  # Randomly sample from memory

antiJamEnv.py CHANGED Viewed

@@ -16,47 +16,49 @@ import numpy as np
 class AntiJamEnv(gym.Env):
-    def __init__(self):
         super(AntiJamEnv, self).__init__()
         self.num_channels = 8
         self.channel_bandwidth = 20  # MHz
         self.frequency_range = [5180, 5320]  # MHz
         self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
         self.action_space = spaces.Discrete(self.num_channels)
-        self.current_channel = np.random.randint(self.num_channels)
         self.jammer_modes = ['constant', 'random', 'sweeping']
-        self.jammer_mode = np.random.choice(self.jammer_modes)
-        self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
-    def _get_received_power(self, channel_idx):
-        # Simulate received jamming power using normal distribution
-        jammed_power = np.random.normal(loc=30, scale=5)
-        adjacent_power = np.random.normal(loc=13, scale=3)
-        far_away_power = np.random.normal(loc=-7, scale=1)
-        if channel_idx == self.current_channel:
-            return jammed_power
-        elif abs(channel_idx - self.current_channel) == 1:
-            return adjacent_power
-        elif abs(channel_idx - self.current_channel) >= 3:
-            return far_away_power
         else:
-            return -30  # Unjammed
     def step(self, action):
         assert self.action_space.contains(action), "Invalid action"
         received_power = self._get_received_power(action)
         if received_power >= 0:
-            reward = 1.0
-        else:
             reward = 0.0
         if self.current_channel != action:
-            reward *= 0.9  # Channel switching cost
         self.current_channel = action
@@ -71,27 +73,21 @@ class AntiJamEnv(gym.Env):
         return self.observation, reward, False, {}
-    def reset(self):
-        self.current_channel = np.random.randint(self.num_channels)
-        self.jammer_mode = np.random.choice(self.jammer_modes)
-        self.jammer_frequency = np.random.uniform(self.frequency_range[0], self.frequency_range[1])
-        self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
-        return self.observation
     def render(self, mode='human'):
         pass
     def close(self):
-        pass
-# Test the environment
-env = AntiJamEnv()
-observation = env.reset()
-for _ in range(10):
-    action = env.action_space.sample()
-    observation, reward, done, _ = env.step(action)
-    print("Action:", action, "Reward:", reward, "Observation:", observation)
-    if done:
-        break

 class AntiJamEnv(gym.Env):
+    def __init__(self, jammer_type, channel_switching_cost):
         super(AntiJamEnv, self).__init__()
+        self.observation = None
+        self.jammer_frequency = None
+        self.jammer_mode = None
+        self.current_channel = None
         self.num_channels = 8
         self.channel_bandwidth = 20  # MHz
         self.frequency_range = [5180, 5320]  # MHz
+        self.frequency_lists = range(5180, 5340, 20)  # MHz
         self.observation_space = spaces.Box(low=-30, high=40, shape=(self.num_channels,), dtype=np.float32)
         self.action_space = spaces.Discrete(self.num_channels)
+        self.jammer_type = jammer_type
         self.jammer_modes = ['constant', 'random', 'sweeping']
+        self.csc = channel_switching_cost
+        self._max_episode_steps = None
+    def reset(self):
+        self.current_channel = np.random.randint(self.num_channels)
+        if self.jammer_type == 'dynamic':
+            self.jammer_mode = np.random.choice(self.jammer_modes)
         else:
+            self.jammer_mode = self.jammer_type
+        self.jammer_frequency = self.frequency_lists[self.current_channel]
+        self.observation = np.array([self._get_received_power(i) for i in range(self.num_channels)])
+        return self.observation
     def step(self, action):
         assert self.action_space.contains(action), "Invalid action"
         received_power = self._get_received_power(action)
         if received_power >= 0:
             reward = 0.0
+        else:
+            reward = 1.0
         if self.current_channel != action:
+            reward -= self.csc  # Channel switching cost
         self.current_channel = action
         return self.observation, reward, False, {}
+    def _get_received_power(self, channel_idx):
+        # Simulate received jamming power using normal distribution
+        jammed_power = np.random.normal(loc=30, scale=5)
+        adjacent_power = np.random.normal(loc=13, scale=3)
+        far_away_power = np.random.normal(loc=-7, scale=1)
+        if channel_idx == self.current_channel:
+            return jammed_power
+        elif abs(channel_idx - self.current_channel) == 1:
+            return adjacent_power
+        else:
+            return far_away_power
     def render(self, mode='human'):
         pass
     def close(self):
+        pass

app.py CHANGED Viewed

@@ -1,4 +1,8 @@
 import streamlit as st
 def main():
     st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
@@ -16,11 +20,24 @@ def main():
     st.write(f"Jammer Type: {jammer_type}")
     st.write(f"Channel Switching Cost: {channel_switching_cost}")
-    st.write("==================================================")
-    st.write("Training Starting")
-    st.write("Training completed")
-    st.write("==================================================")
-    st.write("")
 if __name__ == "__main__":
     main()

 import streamlit as st
+import trainer
+import tester
+import os
 def main():
     st.title("Beyond the Anti-Jam: Integration of DRL with LLM")
     st.write(f"Jammer Type: {jammer_type}")
     st.write(f"Channel Switching Cost: {channel_switching_cost}")
+    if st.button('Train'):
+        st.write("==================================================")
+        st.write('Training Starting')
+        trainer.train(jammer_type, channel_switching_cost)
+        st.write("Training completed")
+        st.write("==================================================")
+    if st.button('Test'):
+        st.write("==================================================")
+        st.write('Testing Starting')
+        agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
+        if os.path.exists(agentName):
+            tester.test(jammer_type, channel_switching_cost)
+            st.write("Testing completed")
+            st.write("==================================================")
+        else:
+            st.write("Agent has not been trained yet. Click Train First!!!")
 if __name__ == "__main__":
     main()

tester.py CHANGED Viewed

@@ -11,16 +11,12 @@ import matplotlib as mpl
 import matplotlib.pyplot as plt
 import json
 from tensorflow import keras
-from ns3gym import ns3env
-from DDQN_FNN import DoubleDeepQNetwork
-jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
-jammerType = jammerTypes[0]
-network = 'FNN'
-cscs = [0, 0.1, 0.2, 0.3, 0.4]  # Channel switching cost
-for csc in cscs:
-    env = gym.make('ns3-v0')
     ob_space = env.observation_space
     ac_space = env.action_space
     print("Observation space: ", ob_space, ob_space.dtype)
@@ -30,9 +26,7 @@ for csc in cscs:
     a_size = ac_space.n
     total_episodes = 200
     max_env_steps = 100
-    train_end = 0
-    TRAIN_Episodes = 100
-    remaining_Episodes = 0
     env._max_episode_steps = max_env_steps
     epsilon = 1.0  # exploration rate
@@ -42,69 +36,17 @@ for csc in cscs:
     lr = 0.001
     batch_size = 32
     DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
     rewards = []  # Store rewards for graphing
     epsilons = []  # Store the Explore/Exploit
-    # Training agent
-    for e in range(TRAIN_Episodes):
-        state = env.reset()
-        # print(f"Initial state is: {state}")
-        state = np.reshape(state, [1, s_size])  # Resize to store in memory to pass to .predict
-        tot_rewards = 0
-        previous_action = 0
-        for time in range(max_env_steps):  # 200 is when you "solve" the game. This can continue forever as far as I know
-            action = DDQN_agent.action(state)
-            next_state, reward, done, _ = env.step(action)
-            # print(f'The next state is: {next_state}')
-            # done: Three collisions occurred in the last 10 steps.
-            # time == max_env_steps - 1 : No collisions occurred
-            if done or time == max_env_steps - 1:
-                rewards.append(tot_rewards)
-                epsilons.append(DDQN_agent.epsilon)
-                print("episode: {}/{}, score: {}, e: {}"
-                      .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
-                break
-            # Applying channel switching cost
-            if action != previous_action:
-                reward -= csc
-            next_state = np.reshape(next_state, [1, s_size])
-            tot_rewards += reward
-            DDQN_agent.store(state, action, reward, next_state, done)  # Resize to store in memory to pass to .predict
-            state = next_state
-            previous_action = action
-            # Experience Replay
-            if len(DDQN_agent.memory) > batch_size:
-                DDQN_agent.experience_replay(batch_size)
-        # Update the weights after each episode (You can configure this for x steps as well
-        DDQN_agent.update_target_from_model()
-        # If our current NN passes we are done
-        # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
-        if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
-            # Set the rest of the episodes for testing
-            remaining_Episodes = total_episodes - e
-            train_end = e
-            break
-    # Testing
-    print('Training complete. Testing started...')
-    # TEST Time
-    #   In this section we ALWAYS use exploit as we don't train anymore
-    total_transmissions = 0
-    successful_transmissions = 0
-    if remaining_Episodes == 0:
-        train_end = TRAIN_Episodes
-        TEST_Episodes = 100
-    else:
-        TEST_Episodes = total_episodes - train_end
-    # Testing Loop
-    n_channel_switches = 0
     for e_test in range(TEST_Episodes):
         state = env.reset()
         state = np.reshape(state, [1, s_size])
         tot_rewards = 0
-        previous_channel = 0
         for t_test in range(max_env_steps):
             action = DDQN_agent.test_action(state)
             next_state, reward, done, _ = env.step(action)
@@ -116,19 +58,11 @@ for csc in cscs:
                 break
             next_state = np.reshape(next_state, [1, s_size])
             tot_rewards += reward
-            if action != previous_channel:
-                n_channel_switches += 1
-            if reward == 1:
-                successful_transmissions += 1
             # DON'T STORE ANYTHING DURING TESTING
             state = next_state
-            previous_channel = action
-            # done: More than 3 collisions occurred in the last 10 steps.
-            # t_test == max_env_steps - 1: No collisions occurred
-            total_transmissions += 1
     # Plotting
-    plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
     rolling_average = np.convolve(rewards, np.ones(10) / 10)
     plt.plot(rewards)
     plt.plot(rolling_average, color='black')
@@ -136,32 +70,13 @@ for csc in cscs:
     # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
     eps_graph = [200 * x for x in epsilons]
     plt.plot(eps_graph, color='g', linestyle='-')
-    # Plot the line where TESTING begins
-    plt.axvline(x=train_end, color='y', linestyle='-')
-    plt.xlim((0, train_end+TEST_Episodes))
-    plt.ylim((0, max_env_steps))
     plt.xlabel('Episodes')
     plt.ylabel('Rewards')
     plt.savefig(plotName, bbox_inches='tight')
-    # plt.show()
     # Save Results
     # Rewards
-    fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
     with open(fileName, 'w') as f:
         json.dump(rewards, f)
-    # Normalized throughput
-    normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
-    print(f'The normalized throughput is: {normalizedThroughput}')
-    fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
-    with open(fileName, 'w') as f:
-        json.dump(normalizedThroughput, f)
-    # Channel switching times
-    normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
-    print(f'The normalized channel switching times is: {normalized_cst}')
-    fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
-    with open(fileName, 'w') as f:
-        json.dump(normalized_cst, f)
-    # Save the agent as a SavedAgent.
-    agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
-    DDQN_agent.save_model(agentName)

 import matplotlib.pyplot as plt
 import json
 from tensorflow import keras
+from DDQN import DoubleDeepQNetwork
+from antiJamEnv import AntiJamEnv
+def test(jammer_type, channel_switching_cost):
+    env = AntiJamEnv(jammer_type, channel_switching_cost)
     ob_space = env.observation_space
     ac_space = env.action_space
     print("Observation space: ", ob_space, ob_space.dtype)
     a_size = ac_space.n
     total_episodes = 200
     max_env_steps = 100
+    TEST_Episodes = 100
     env._max_episode_steps = max_env_steps
     epsilon = 1.0  # exploration rate
     lr = 0.001
     batch_size = 32
+    agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
     DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay)
+    DDQN_agent.model = DDQN_agent.load_saved_model(agentName)
     rewards = []  # Store rewards for graphing
     epsilons = []  # Store the Explore/Exploit
+    # Testing agent
     for e_test in range(TEST_Episodes):
         state = env.reset()
         state = np.reshape(state, [1, s_size])
         tot_rewards = 0
         for t_test in range(max_env_steps):
             action = DDQN_agent.test_action(state)
             next_state, reward, done, _ = env.step(action)
                 break
             next_state = np.reshape(next_state, [1, s_size])
             tot_rewards += reward
             # DON'T STORE ANYTHING DURING TESTING
             state = next_state
     # Plotting
+    plotName = f'results/test/rewards_{jammer_type}_csc_{channel_switching_cost}.png'
     rolling_average = np.convolve(rewards, np.ones(10) / 10)
     plt.plot(rewards)
     plt.plot(rolling_average, color='black')
     # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
     eps_graph = [200 * x for x in epsilons]
     plt.plot(eps_graph, color='g', linestyle='-')
     plt.xlabel('Episodes')
     plt.ylabel('Rewards')
     plt.savefig(plotName, bbox_inches='tight')
+    plt.show()
     # Save Results
     # Rewards
+    fileName = f'results/test/rewards_{jammer_type}_csc_{channel_switching_cost}.json'
     with open(fileName, 'w') as f:
         json.dump(rewards, f)

trainer.py CHANGED Viewed

@@ -12,14 +12,11 @@ import matplotlib.pyplot as plt
 import json
 from tensorflow import keras
 from DDQN import DoubleDeepQNetwork
-jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random']
-jammerType = jammerTypes[0]
-network = 'FNN'
-cscs = [0, 0.05, 0.1, 0.15, 0.2]  # Channel switching cost
-for csc in cscs:
-    env = gym.make('ns3-v0')
     ob_space = env.observation_space
     ac_space = env.action_space
     print("Observation space: ", ob_space, ob_space.dtype)
@@ -27,11 +24,8 @@ for csc in cscs:
     s_size = ob_space.shape[0]
     a_size = ac_space.n
-    total_episodes = 200
     max_env_steps = 100
-    train_end = 0
     TRAIN_Episodes = 100
-    remaining_Episodes = 0
     env._max_episode_steps = max_env_steps
     epsilon = 1.0  # exploration rate
@@ -65,13 +59,10 @@ for csc in cscs:
                       .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
                 break
             # Applying channel switching cost
-            if action != previous_action:
-                reward -= csc
             next_state = np.reshape(next_state, [1, s_size])
             tot_rewards += reward
             DDQN_agent.store(state, action, reward, next_state, done)  # Resize to store in memory to pass to .predict
             state = next_state
-            previous_action = action
             # Experience Replay
             if len(DDQN_agent.memory) > batch_size:
@@ -81,86 +72,28 @@ for csc in cscs:
         # If our current NN passes we are done
         # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
         if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
-            # Set the rest of the episodes for testing
-            remaining_Episodes = total_episodes - e
-            train_end = e
             break
-    # Testing
-    print('Training complete. Testing started...')
-    # TEST Time
-    #   In this section we ALWAYS use exploit as we don't train anymore
-    total_transmissions = 0
-    successful_transmissions = 0
-    if remaining_Episodes == 0:
-        train_end = TRAIN_Episodes
-        TEST_Episodes = 100
-    else:
-        TEST_Episodes = total_episodes - train_end
-    # Testing Loop
-    n_channel_switches = 0
-    for e_test in range(TEST_Episodes):
-        state = env.reset()
-        state = np.reshape(state, [1, s_size])
-        tot_rewards = 0
-        previous_channel = 0
-        for t_test in range(max_env_steps):
-            action = DDQN_agent.test_action(state)
-            next_state, reward, done, _ = env.step(action)
-            if done or t_test == max_env_steps - 1:
-                rewards.append(tot_rewards)
-                epsilons.append(0)  # We are doing full exploit
-                print("episode: {}/{}, score: {}, e: {}"
-                      .format(e_test, TEST_Episodes, tot_rewards, 0))
-                break
-            next_state = np.reshape(next_state, [1, s_size])
-            tot_rewards += reward
-            if action != previous_channel:
-                n_channel_switches += 1
-            if reward == 1:
-                successful_transmissions += 1
-            # DON'T STORE ANYTHING DURING TESTING
-            state = next_state
-            previous_channel = action
-            # done: More than 3 collisions occurred in the last 10 steps.
-            # t_test == max_env_steps - 1: No collisions occurred
-            total_transmissions += 1
     # Plotting
-    plotName = f'results/{network}/{jammerType}_csc_{csc}.png'
     rolling_average = np.convolve(rewards, np.ones(10) / 10)
     plt.plot(rewards)
     plt.plot(rolling_average, color='black')
     plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-')  # Solved Line
-    # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range
-    eps_graph = [200 * x for x in epsilons]
     plt.plot(eps_graph, color='g', linestyle='-')
-    # Plot the line where TESTING begins
-    plt.axvline(x=train_end, color='y', linestyle='-')
-    plt.xlim((0, train_end+TEST_Episodes))
-    plt.ylim((0, max_env_steps))
     plt.xlabel('Episodes')
     plt.ylabel('Rewards')
     plt.savefig(plotName, bbox_inches='tight')
-    # plt.show()
     # Save Results
     # Rewards
-    fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json'
     with open(fileName, 'w') as f:
         json.dump(rewards, f)
-    # Normalized throughput
-    normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2))
-    print(f'The normalized throughput is: {normalizedThroughput}')
-    fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json'
-    with open(fileName, 'w') as f:
-        json.dump(normalizedThroughput, f)
-    # Channel switching times
-    normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2))
-    print(f'The normalized channel switching times is: {normalized_cst}')
-    fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json'
-    with open(fileName, 'w') as f:
-        json.dump(normalized_cst, f)
     # Save the agent as a SavedAgent.
-    agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}'
     DDQN_agent.save_model(agentName)

 import json
 from tensorflow import keras
 from DDQN import DoubleDeepQNetwork
+from antiJamEnv import AntiJamEnv
+def train(jammer_type, channel_switching_cost):
+    env = AntiJamEnv(jammer_type, channel_switching_cost)
     ob_space = env.observation_space
     ac_space = env.action_space
     print("Observation space: ", ob_space, ob_space.dtype)
     s_size = ob_space.shape[0]
     a_size = ac_space.n
     max_env_steps = 100
     TRAIN_Episodes = 100
     env._max_episode_steps = max_env_steps
     epsilon = 1.0  # exploration rate
                       .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon))
                 break
             # Applying channel switching cost
             next_state = np.reshape(next_state, [1, s_size])
             tot_rewards += reward
             DDQN_agent.store(state, action, reward, next_state, done)  # Resize to store in memory to pass to .predict
             state = next_state
             # Experience Replay
             if len(DDQN_agent.memory) > batch_size:
         # If our current NN passes we are done
         # Early stopping criteria: I am going to use the last 10 runs within 1% of the max
         if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps:
             break
     # Plotting
+    plotName = f'results/train/rewards_{jammer_type}_csc_{channel_switching_cost}.png'
     rolling_average = np.convolve(rewards, np.ones(10) / 10)
     plt.plot(rewards)
     plt.plot(rolling_average, color='black')
     plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-')  # Solved Line
+    # Scale Epsilon (0.001 - 1.0) to match reward (0 - 100) range
+    eps_graph = [100 * x for x in epsilons]
     plt.plot(eps_graph, color='g', linestyle='-')
     plt.xlabel('Episodes')
     plt.ylabel('Rewards')
     plt.savefig(plotName, bbox_inches='tight')
+    plt.show()
     # Save Results
     # Rewards
+    fileName = f'results/train/rewards_{jammer_type}_csc_{channel_switching_cost}.json'
     with open(fileName, 'w') as f:
         json.dump(rewards, f)
     # Save the agent as a SavedAgent.
+    agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}'
     DDQN_agent.save_model(agentName)