Spaces:

acozma
/

CS581-Algos-Demo

Sleeping

CS581-Algos-Demo / scripts /nStepTDAgent.py

Andrei Cozma

Updates

f902143 about 2 years ago

3.53 kB

	# ExpectedSarsaAgent.py

	import sys

	from tabulate import tabulate
	import numpy as np

	episode = [["s1", "E", 0],
	["s2", "E", 1],
	["s3", "N", 2],
	["s3", "N", 3],
	["s3", "S", 4],
	["s6", "S", 5],
	["s9", None, None]]

	index_map = {
	"s1": 0,
	"s2": 1,
	"s3": 2,
	"s4": 3,
	"s5": 4,
	"s6": 5,
	"s7": 6,
	"s8": 7,
	"s9": 8,
	"N": 0,
	"E": 1,
	"S": 2,
	"W": 3
	}

	class nStepTDAgent():
	def __init__(self, alpha, gamma, num_state, num_actions):
	"""
	Constructor
	Args:
	epsilon: The degree of exploration
	gamma: The discount factor
	num_state: The number of states
	num_actions: The number of actions
	"""
	self.alpha = alpha
	self.gamma = gamma
	self.num_state = num_state
	self.num_actions = num_actions

	self.Q = np.zeros((self.num_state, self.num_actions))

	def run_episode(self, n, episode):
	"""
	Update the action value function using the n-step TD update.
	"""

	rew = [0, ]

	bigT = sys.maxsize
	print("T: ", bigT)
	for t, step in enumerate(episode.reverse()):
	print("=" * 80)
	print("Step: ", t)
	if t < bigT:
	s_t, a_t, r_t1 = step
	print(f" s_t: {s_t}, a_t: {a_t}, r_t1: {r_t1}")
	s_t1, _, _ = episode[t + 1]
	rew.append(r_t1)

	_, _, r_t2 = episode[t + 1]
	if r_t2 is None:
	bigT = t + 1
	print("TERMINAL => T: ", bigT)

	Tt = t - n + 1
	print(f" Tt: {Tt}")
	if Tt >= 0:
	print(f' ==============')
	bigG = 0
	for i in range(Tt + 1, min(Tt + n , bigT) + 1):
	print(f" i: {i}")
	r_t1 = rew[i]
	print(f" r_t{i}: {r_t1}")
	print(f" {bigG} += {self.gamma}^{i - Tt - 1} * {r_t1}")
	bigG += self.gamma*(i - Tt - 1) r_t1
	print(f" G: {bigG}")
	print(f' --------------')
	if Tt + n < bigT:
	s_Tn, a_Tn = episode[Tt + n][0], episode[Tt + n][1]

	print(f" s_Tn: {s_Tn}, a_Tn: {a_Tn}")
	s_Tn, a_Tn = index_map[s_Tn], index_map[a_Tn]
	print(f" {bigG} += {self.gamma}^{n} * {self.Q[s_Tn, a_Tn]}")
	bigG += (self.gamma*n) self.Q[s_Tn, a_Tn]
	print(f" G: {bigG}")
	print(f' ==============')

	s_Tt, a_Tt = episode[Tt][0], episode[Tt][1]
	print(f" => Update Q[{s_Tt}, {a_Tt}]")
	s_Tti, a_Tti = index_map[s_Tt], index_map[a_Tt]
	print(f" Q[{s_Tt}, {a_Tt}] = {self.Q[s_Tti, a_Tti]}")
	self.Q[s_Tti, a_Tti] += self.alpha * (bigG - self.Q[s_Tti, a_Tti])
	print(f" Q[{s_Tt}, {a_Tt}] = {self.Q[s_Tti, a_Tti]}")
	print(f"Q:")
	print(tabulate(self.Q, tablefmt="fancy_grid"))
	if Tt == bigT - 1:
	break






	def main_r():
	print("# nStepTDAgent.py")
	agent = nStepTDAgent(0.1, 0.9, 9, 4)
	print(agent.Q)
	agent.run_episode(3, episode)



	if __name__ == "__main__":
	main_r()