Spaces:
Runtime error
Runtime error
Jarvis-K
commited on
Commit
·
83494ad
1
Parent(s):
5f98914
uupdate naive actor, ant-v4 expert result
Browse files- deciders/act.py +8 -33
- main_reflexion.py +1 -0
- record_reflexion.csv +1 -1
deciders/act.py
CHANGED
@@ -209,41 +209,16 @@ class NaiveAct(gpt):
|
|
209 |
my_mem += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
|
210 |
my_mem += f"{self.env_history.get_histories(self.mem_num)}"
|
211 |
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
if "Continuous" in self.args.env_name:
|
218 |
-
action = float(re.findall(r"[-+]?\d*\.\d+", action_str)[0])
|
219 |
-
|
220 |
-
else:
|
221 |
-
action = int(re.findall(r"\d+", action_str)[0])
|
222 |
-
except:
|
223 |
-
action = None
|
224 |
-
asking_round += 1
|
225 |
-
continue
|
226 |
-
|
227 |
-
if "Continuous" not in self.args.env_name:
|
228 |
-
if (action-1) in self.action_space:
|
229 |
-
break
|
230 |
-
else:
|
231 |
-
asking_round += 1
|
232 |
-
action = None
|
233 |
-
else:
|
234 |
-
if action >= self.action_space.low and action <= self.action_space.high:
|
235 |
-
break
|
236 |
-
else:
|
237 |
-
asking_round += 1
|
238 |
-
action = None
|
239 |
-
|
240 |
-
if action is None:
|
241 |
-
print('err on selecting action')
|
242 |
-
action = self.default_action
|
243 |
self._add_history_after_action(action)
|
244 |
-
self.logger.info(f'
|
245 |
-
self.logger.info(f'The GPT response is: {res}.')
|
246 |
self.logger.info(f'The optimal action is: {action}.')
|
|
|
|
|
247 |
return action, prompt, res, 0, 0
|
248 |
|
249 |
def _read_mem(self, ):
|
|
|
209 |
my_mem += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
|
210 |
my_mem += f"{self.env_history.get_histories(self.mem_num)}"
|
211 |
|
212 |
+
|
213 |
+
prompt, res = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
|
214 |
+
action_str = res.choices[0].text.strip()
|
215 |
+
print(f'my anwser is {action_str}')
|
216 |
+
action = self.parser.parse(response).action
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
self._add_history_after_action(action)
|
218 |
+
self.logger.info(f'The GPT response is: {response}.')
|
|
|
219 |
self.logger.info(f'The optimal action is: {action}.')
|
220 |
+
if env_info.get('history'):
|
221 |
+
self.logger.info(f'History: {history_to_str(env_info["history"])}')
|
222 |
return action, prompt, res, 0, 0
|
223 |
|
224 |
def _read_mem(self, ):
|
main_reflexion.py
CHANGED
@@ -137,6 +137,7 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
|
|
137 |
logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
|
138 |
continue
|
139 |
if error_flag:
|
|
|
140 |
state_description, reward, termination, truncation, env_info = environment.step_llm(
|
141 |
action
|
142 |
)
|
|
|
137 |
logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
|
138 |
continue
|
139 |
if error_flag:
|
140 |
+
action = decider.default_action
|
141 |
state_description, reward, termination, truncation, env_info = environment.step_llm(
|
142 |
action
|
143 |
)
|
record_reflexion.csv
CHANGED
@@ -10,5 +10,5 @@ FrozenLake-v1,1,expert,200.0
|
|
10 |
MountainCarContinuous-v0,1,expert,200.0
|
11 |
RepresentedBoxing-v0,1,expert,200.0
|
12 |
RepresentedPong-v0,1,expert,200.0
|
13 |
-
Ant-v4,1,expert,
|
14 |
|
|
|
10 |
MountainCarContinuous-v0,1,expert,200.0
|
11 |
RepresentedBoxing-v0,1,expert,200.0
|
12 |
RepresentedPong-v0,1,expert,200.0
|
13 |
+
Ant-v4,1,expert,5000
|
14 |
|