Spaces:

NCTCMumbai
/

HS_Code_AI-Explanability

Running

App Files Files Community

HS_Code_AI-Explanability / models /research /efficient-hrl /context /configs /point_maze.gin

NCTCMumbai

Upload 2583 files

97b6013 verified about 1 year ago

raw

history blame contribute delete

2.42 kB

	#--Python--
	# NOTE: For best training, low-level exploration (uvf_add_noise_fn.stddev)
	# should be reduced to around 0.1.
	create_maze_env.env_name = "PointMaze"
	context_range_min = -10
	context_range_max = 10
	context_range = (%context_range_min, %context_range_max)
	meta_context_range = ((-2, -2), (10, 10))

	RESET_EPISODE_PERIOD = 500
	RESET_ENV_PERIOD = 1
	# End episode every N steps
	UvfAgent.reset_episode_cond_fn = @every_n_steps
	every_n_steps.n = %RESET_EPISODE_PERIOD
	train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD
	# Do a manual reset every N episodes
	UvfAgent.reset_env_cond_fn = @every_n_episodes
	every_n_episodes.n = %RESET_ENV_PERIOD
	every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD

	## Config defaults
	EVAL_MODES = ["eval1", "eval2", "eval3"]

	## Config agent
	CONTEXT = @agent/Context
	META_CONTEXT = @meta/Context

	## Config agent context
	agent/Context.context_ranges = [%context_range]
	agent/Context.context_shapes = [%SUBGOAL_DIM]
	agent/Context.meta_action_every_n = 10
	agent/Context.samplers = {
	"train": [@train/DirectionSampler],
	"explore": [@train/DirectionSampler],
	"eval1": [@uvf_eval1/ConstantSampler],
	"eval2": [@uvf_eval2/ConstantSampler],
	"eval3": [@uvf_eval3/ConstantSampler],
	}

	agent/Context.context_transition_fn = @relative_context_transition_fn
	agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn

	agent/Context.reward_fn = @uvf/negative_distance

	## Config meta context
	meta/Context.context_ranges = [%meta_context_range]
	meta/Context.context_shapes = [2]
	meta/Context.samplers = {
	"train": [@train/RandomSampler],
	"explore": [@train/RandomSampler],
	"eval1": [@eval1/ConstantSampler],
	"eval2": [@eval2/ConstantSampler],
	"eval3": [@eval3/ConstantSampler],
	}
	meta/Context.reward_fn = @task/negative_distance

	## Config rewards
	task/negative_distance.state_indices = [0, 1]
	task/negative_distance.relative_context = False
	task/negative_distance.diff = False
	task/negative_distance.offset = 0.0

	## Config samplers
	train/RandomSampler.context_range = %meta_context_range
	train/DirectionSampler.context_range = %context_range
	train/DirectionSampler.k = %SUBGOAL_DIM
	relative_context_transition_fn.k = %SUBGOAL_DIM
	relative_context_multi_transition_fn.k = %SUBGOAL_DIM
	MetaAgent.k = %SUBGOAL_DIM

	eval1/ConstantSampler.value = [8, 0]
	eval2/ConstantSampler.value = [8, 8]
	eval3/ConstantSampler.value = [0, 8]