#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os from os import mkdir import gym import tensorflow as tf import tf_slim as slim import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import json from tensorflow import keras from ns3gym import ns3env from DDQN_FNN import DoubleDeepQNetwork jammerTypes = ['dynamic_pattern', 'combined', 'sweeping', 'random'] jammerType = jammerTypes[0] network = 'FNN' cscs = [0, 0.1, 0.2, 0.3, 0.4] # Channel switching cost for csc in cscs: env = gym.make('ns3-v0') ob_space = env.observation_space ac_space = env.action_space print("Observation space: ", ob_space, ob_space.dtype) print("Action space: ", ac_space, ac_space.n) s_size = ob_space.shape[0] a_size = ac_space.n total_episodes = 200 max_env_steps = 100 train_end = 0 TRAIN_Episodes = 100 remaining_Episodes = 0 env._max_episode_steps = max_env_steps epsilon = 1.0 # exploration rate epsilon_min = 0.01 epsilon_decay = 0.999 discount_rate = 0.95 lr = 0.001 batch_size = 32 DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay) rewards = [] # Store rewards for graphing epsilons = [] # Store the Explore/Exploit # Training agent for e in range(TRAIN_Episodes): state = env.reset() # print(f"Initial state is: {state}") state = np.reshape(state, [1, s_size]) # Resize to store in memory to pass to .predict tot_rewards = 0 previous_action = 0 for time in range(max_env_steps): # 200 is when you "solve" the game. This can continue forever as far as I know action = DDQN_agent.action(state) next_state, reward, done, _ = env.step(action) # print(f'The next state is: {next_state}') # done: Three collisions occurred in the last 10 steps. # time == max_env_steps - 1 : No collisions occurred if done or time == max_env_steps - 1: rewards.append(tot_rewards) epsilons.append(DDQN_agent.epsilon) print("episode: {}/{}, score: {}, e: {}" .format(e, TRAIN_Episodes, tot_rewards, DDQN_agent.epsilon)) break # Applying channel switching cost if action != previous_action: reward -= csc next_state = np.reshape(next_state, [1, s_size]) tot_rewards += reward DDQN_agent.store(state, action, reward, next_state, done) # Resize to store in memory to pass to .predict state = next_state previous_action = action # Experience Replay if len(DDQN_agent.memory) > batch_size: DDQN_agent.experience_replay(batch_size) # Update the weights after each episode (You can configure this for x steps as well DDQN_agent.update_target_from_model() # If our current NN passes we are done # Early stopping criteria: I am going to use the last 10 runs within 1% of the max if len(rewards) > 10 and np.average(rewards[-10:]) >= max_env_steps - 0.10 * max_env_steps: # Set the rest of the episodes for testing remaining_Episodes = total_episodes - e train_end = e break # Testing print('Training complete. Testing started...') # TEST Time # In this section we ALWAYS use exploit as we don't train anymore total_transmissions = 0 successful_transmissions = 0 if remaining_Episodes == 0: train_end = TRAIN_Episodes TEST_Episodes = 100 else: TEST_Episodes = total_episodes - train_end # Testing Loop n_channel_switches = 0 for e_test in range(TEST_Episodes): state = env.reset() state = np.reshape(state, [1, s_size]) tot_rewards = 0 previous_channel = 0 for t_test in range(max_env_steps): action = DDQN_agent.test_action(state) next_state, reward, done, _ = env.step(action) if done or t_test == max_env_steps - 1: rewards.append(tot_rewards) epsilons.append(0) # We are doing full exploit print("episode: {}/{}, score: {}, e: {}" .format(e_test, TEST_Episodes, tot_rewards, 0)) break next_state = np.reshape(next_state, [1, s_size]) tot_rewards += reward if action != previous_channel: n_channel_switches += 1 if reward == 1: successful_transmissions += 1 # DON'T STORE ANYTHING DURING TESTING state = next_state previous_channel = action # done: More than 3 collisions occurred in the last 10 steps. # t_test == max_env_steps - 1: No collisions occurred total_transmissions += 1 # Plotting plotName = f'results/{network}/{jammerType}_csc_{csc}.png' rolling_average = np.convolve(rewards, np.ones(10) / 10) plt.plot(rewards) plt.plot(rolling_average, color='black') plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range eps_graph = [200 * x for x in epsilons] plt.plot(eps_graph, color='g', linestyle='-') # Plot the line where TESTING begins plt.axvline(x=train_end, color='y', linestyle='-') plt.xlim((0, train_end+TEST_Episodes)) plt.ylim((0, max_env_steps)) plt.xlabel('Episodes') plt.ylabel('Rewards') plt.savefig(plotName, bbox_inches='tight') # plt.show() # Save Results # Rewards fileName = f'results/{network}/rewards_{jammerType}_csc_{csc}.json' with open(fileName, 'w') as f: json.dump(rewards, f) # Normalized throughput normalizedThroughput = successful_transmissions / (TEST_Episodes*(max_env_steps-2)) print(f'The normalized throughput is: {normalizedThroughput}') fileName = f'results/{network}/throughput_{jammerType}_csc_{csc}.json' with open(fileName, 'w') as f: json.dump(normalizedThroughput, f) # Channel switching times normalized_cst = n_channel_switches / (TEST_Episodes*(max_env_steps-2)) print(f'The normalized channel switching times is: {normalized_cst}') fileName = f'results/{network}/times_{jammerType}_csc_{csc}.json' with open(fileName, 'w') as f: json.dump(normalized_cst, f) # Save the agent as a SavedAgent. agentName = f'savedAgents/{network}/DDQNAgent_{jammerType}_csc_{csc}' DDQN_agent.save_model(agentName)