Spaces:

NCTCMumbai
/

AdvaitBERT-AI_Explanability

Sleeping

App Files Files Community

AdvaitBERT-AI_Explanability / models /research /efficient-hrl /context /configs /ant_push_single.gin

NCTCMumbai

Upload 2583 files

18ddfe2 verified over 1 year ago

raw

history blame contribute delete

1.97 kB

	#--Python--
	create_maze_env.env_name = "AntPush"
	context_range = (%CONTEXT_RANGE_MIN, %CONTEXT_RANGE_MAX)
	meta_context_range = ((-16, -4), (16, 20))

	RESET_EPISODE_PERIOD = 500
	RESET_ENV_PERIOD = 1
	# End episode every N steps
	UvfAgent.reset_episode_cond_fn = @every_n_steps
	every_n_steps.n = %RESET_EPISODE_PERIOD
	train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD
	# Do a manual reset every N episodes
	UvfAgent.reset_env_cond_fn = @every_n_episodes
	every_n_episodes.n = %RESET_ENV_PERIOD
	every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD

	## Config defaults
	EVAL_MODES = ["eval2"]

	## Config agent
	CONTEXT = @agent/Context
	META_CONTEXT = @meta/Context

	## Config agent context
	agent/Context.context_ranges = [%context_range]
	agent/Context.context_shapes = [%SUBGOAL_DIM]
	agent/Context.meta_action_every_n = 10
	agent/Context.samplers = {
	"train": [@train/DirectionSampler],
	"explore": [@train/DirectionSampler],
	}

	agent/Context.context_transition_fn = @relative_context_transition_fn
	agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn

	agent/Context.reward_fn = @uvf/negative_distance

	## Config meta context
	meta/Context.context_ranges = [%meta_context_range]
	meta/Context.context_shapes = [2]
	meta/Context.samplers = {
	"train": [@eval2/ConstantSampler],
	"explore": [@eval2/ConstantSampler],
	"eval2": [@eval2/ConstantSampler],
	}
	meta/Context.reward_fn = @task/negative_distance

	## Config rewards
	task/negative_distance.state_indices = [0, 1]
	task/negative_distance.relative_context = False
	task/negative_distance.diff = False
	task/negative_distance.offset = 0.0

	## Config samplers
	train/RandomSampler.context_range = %meta_context_range
	train/DirectionSampler.context_range = %context_range
	train/DirectionSampler.k = %SUBGOAL_DIM
	relative_context_transition_fn.k = %SUBGOAL_DIM
	relative_context_multi_transition_fn.k = %SUBGOAL_DIM
	MetaAgent.k = %SUBGOAL_DIM

	eval2/ConstantSampler.value = [0, 19]