Spaces:

MAIL-CS-ECNU
/

Text-Gym-Agents

Runtime error

App Files Files Community

Text-Gym-Agents / envs /mujoco /reacher_translator.py

CharlesZhang

add reacher env and all mujoco envs now support COT, SPP, SELF-REFLEXION, EXE methods under L1&L3 setting.

8f842da over 1 year ago

raw

history blame contribute delete

3.35 kB

	'''Reacher
	Action Space Box(-1.0, 1.0, (2,), float32)

	Observation Space Box(-inf, inf, (11,), float64)
	'''
	class BasicLevelTranslator:
	def __init__(self):
	pass

	def translate(self, state):
	(cos_angle_arm1, cos_angle_arm2, sin_angle_arm1, sin_angle_arm2,
	target_x, target_y, angular_vel_arm1, angular_vel_arm2,
	diff_x, diff_y, diff_z) = state

	res = (f"Arm1 has a cosine angle of {cos_angle_arm1:.2f} and a sine angle of {sin_angle_arm1:.2f}. "\
	f"Arm2 has a cosine angle of {cos_angle_arm2:.2f} and a sine angle of {sin_angle_arm2:.2f}. "\
	f"Target position is at ({target_x:.2f}, {target_y:.2f}). "\
	f"Arm1's angular velocity is {angular_vel_arm1:.2f} rad/s, and Arm2's is {angular_vel_arm2:.2f} rad/s. "\
	f"Vector difference between fingertip and target is ({diff_x:.2f}, {diff_y:.2f}, {diff_z:.2f}).")
	return res

	class GameDescriber:
	def __init__(self, args):
	self.is_only_local_obs = args.is_only_local_obs == 1
	self.max_episode_len = args.max_episode_len
	self.action_desc_dict = {
	}
	self.reward_desc_dict = {
	}

	def translate_terminate_state(self, state, episode_len, max_episode_len):
	return ""

	def translate_potential_next_state(self, state, action):
	return ""

	def describe_goal(self):
	return "The goal is to control a two-jointed robot arm to move its end effector (fingertip) close to a randomly spawned target."

	def describe_game(self):
	return ("In the Reacher game, you control a two-jointed robot arm. The objective is to maneuver the arm's fingertip close to a target. "\
	"The observation space includes the cosine and sine of the arm angles, coordinates of the target, angular velocities of the arms, "\
	"and the vector from the fingertip to the target. The episode ends after 50 timesteps or if any state space value becomes non-finite. "\
	"Rewards are given based on the distance of the fingertip from the target and the magnitude of actions applied.")

	def describe_action(self):
	return ("Your next move: \n Please provide two numerical values representing the torques applied at the two hinge joints. "\
	"Each value should be within the range of [-1, 1].")

	class BasicStateSequenceTranslator(BasicLevelTranslator):
	def translate(self, infos, is_current=False):
	descriptions = []
	if is_current:
	state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
	return state_desc
	for i, info in enumerate(infos):
	assert 'state' in info, "info should contain state information"

	state_desc = BasicLevelTranslator().translate(info['state'])
	action_desc = ("Take Action: Apply Torque at Joint 1: {:.2f}, "
	"Joint 2 Torque: {:.2f}"
	).format(info['action'][0], info['action'][1])

	reward_desc = f"Result: Reward of {info['reward']:.2f}, "
	next_state_desc = BasicLevelTranslator().translate(info['next_state'])
	descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
	return descriptions