#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os from os import mkdir import gym import tensorflow as tf import tf_slim as slim import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import json from tensorflow import keras from DDQN import DoubleDeepQNetwork from antiJamEnv import AntiJamEnv def test(jammer_type, channel_switching_cost): env = AntiJamEnv(jammer_type, channel_switching_cost) ob_space = env.observation_space ac_space = env.action_space print("Observation space: ", ob_space, ob_space.dtype) print("Action space: ", ac_space, ac_space.n) s_size = ob_space.shape[0] a_size = ac_space.n total_episodes = 200 max_env_steps = 100 TEST_Episodes = 100 env._max_episode_steps = max_env_steps epsilon = 1.0 # exploration rate epsilon_min = 0.01 epsilon_decay = 0.999 discount_rate = 0.95 lr = 0.001 batch_size = 32 agentName = f'savedAgents/DDQNAgent_{jammer_type}_csc_{channel_switching_cost}' DDQN_agent = DoubleDeepQNetwork(s_size, a_size, lr, discount_rate, epsilon, epsilon_min, epsilon_decay) DDQN_agent.model = DDQN_agent.load_saved_model(agentName) rewards = [] # Store rewards for graphing epsilons = [] # Store the Explore/Exploit # Testing agent for e_test in range(TEST_Episodes): state = env.reset() state = np.reshape(state, [1, s_size]) tot_rewards = 0 for t_test in range(max_env_steps): action = DDQN_agent.test_action(state) next_state, reward, done, _ = env.step(action) if done or t_test == max_env_steps - 1: rewards.append(tot_rewards) epsilons.append(0) # We are doing full exploit print("episode: {}/{}, score: {}, e: {}" .format(e_test, TEST_Episodes, tot_rewards, 0)) break next_state = np.reshape(next_state, [1, s_size]) tot_rewards += reward # DON'T STORE ANYTHING DURING TESTING state = next_state # Plotting plotName = f'results/test/rewards_{jammer_type}_csc_{channel_switching_cost}.png' rolling_average = np.convolve(rewards, np.ones(10) / 10) plt.plot(rewards) plt.plot(rolling_average, color='black') plt.axhline(y=max_env_steps - 0.10 * max_env_steps, color='r', linestyle='-') # Solved Line # Scale Epsilon (0.001 - 1.0) to match reward (0 - 200) range eps_graph = [200 * x for x in epsilons] plt.plot(eps_graph, color='g', linestyle='-') plt.xlabel('Episodes') plt.ylabel('Rewards') plt.savefig(plotName, bbox_inches='tight') plt.show() # Save Results # Rewards fileName = f'results/test/rewards_{jammer_type}_csc_{channel_switching_cost}.json' with open(fileName, 'w') as f: json.dump(rewards, f)