Spaces:
Sleeping
Sleeping
File size: 1,455 Bytes
d67dca9 a5f2527 c4ae7c9 d67dca9 897f151 c4ae7c9 d67dca9 897f151 d67dca9 897f151 d67dca9 c4ae7c9 d67dca9 a5f2527 d67dca9 897f151 d67dca9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
import streamlit as st
from antiJamEnv import AntiJamEnv
def test(agent, jammer_type, channel_switching_cost):
env = AntiJamEnv(jammer_type, channel_switching_cost)
ob_space = env.observation_space
ac_space = env.action_space
s_size = ob_space.shape[0]
a_size = ac_space.n
max_env_steps = 3
TEST_Episodes = 1
env._max_episode_steps = max_env_steps
DDQN_agent = agent
rewards = [] # Store rewards for graphing
epsilons = [] # Store the Explore/Exploit
# Testing agent
for e_test in range(TEST_Episodes):
state = env.reset()
state = np.reshape(state, [1, s_size])
tot_rewards = 0
for t_test in range(max_env_steps):
action = DDQN_agent.test_action(state)
next_state, reward, done, _ = env.step(action)
if done or t_test == max_env_steps - 1:
rewards.append(tot_rewards)
epsilons.append(0) # We are doing full exploit
st.write(f"episode: {e_test}/{TEST_Episodes}, score: {tot_rewards}, e: {DDQN_agent.epsilon}")
break
next_state = np.reshape(next_state, [1, s_size])
tot_rewards += reward
st.write(f"The state is: {state}, action taken is: {action}, obtained reward is: {reward}")
# DON'T STORE ANYTHING DURING TESTING
state = next_state
|