Jarvis-K commited on
Commit
83494ad
·
1 Parent(s): 5f98914

uupdate naive actor, ant-v4 expert result

Browse files
Files changed (3) hide show
  1. deciders/act.py +8 -33
  2. main_reflexion.py +1 -0
  3. record_reflexion.csv +1 -1
deciders/act.py CHANGED
@@ -209,41 +209,16 @@ class NaiveAct(gpt):
209
  my_mem += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
210
  my_mem += f"{self.env_history.get_histories(self.mem_num)}"
211
 
212
- while asking_round < 3:
213
- prompt, res = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
214
- action_str = res.choices[0].text.strip()
215
- print(f'my anwser is {action_str}')
216
- try:
217
- if "Continuous" in self.args.env_name:
218
- action = float(re.findall(r"[-+]?\d*\.\d+", action_str)[0])
219
-
220
- else:
221
- action = int(re.findall(r"\d+", action_str)[0])
222
- except:
223
- action = None
224
- asking_round += 1
225
- continue
226
-
227
- if "Continuous" not in self.args.env_name:
228
- if (action-1) in self.action_space:
229
- break
230
- else:
231
- asking_round += 1
232
- action = None
233
- else:
234
- if action >= self.action_space.low and action <= self.action_space.high:
235
- break
236
- else:
237
- asking_round += 1
238
- action = None
239
-
240
- if action is None:
241
- print('err on selecting action')
242
- action = self.default_action
243
  self._add_history_after_action(action)
244
- self.logger.info(f'\n{prompt}')
245
- self.logger.info(f'The GPT response is: {res}.')
246
  self.logger.info(f'The optimal action is: {action}.')
 
 
247
  return action, prompt, res, 0, 0
248
 
249
  def _read_mem(self, ):
 
209
  my_mem += f"\nBelow are the latest {min(self.mem_num, len(self.env_history))} historical data entries:\n"
210
  my_mem += f"{self.env_history.get_histories(self.mem_num)}"
211
 
212
+
213
+ prompt, res = self.response(state_description, action_description, env_info, game_description, goal_description, my_mem)
214
+ action_str = res.choices[0].text.strip()
215
+ print(f'my anwser is {action_str}')
216
+ action = self.parser.parse(response).action
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  self._add_history_after_action(action)
218
+ self.logger.info(f'The GPT response is: {response}.')
 
219
  self.logger.info(f'The optimal action is: {action}.')
220
+ if env_info.get('history'):
221
+ self.logger.info(f'History: {history_to_str(env_info["history"])}')
222
  return action, prompt, res, 0, 0
223
 
224
  def _read_mem(self, ):
main_reflexion.py CHANGED
@@ -137,6 +137,7 @@ def _run(translator, environment, decider, max_episode_len, logfile, args, trail
137
  logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
138
  continue
139
  if error_flag:
 
140
  state_description, reward, termination, truncation, env_info = environment.step_llm(
141
  action
142
  )
 
137
  logger.debug(f"Error: {e}, Retry! ({error_i+1}/{retry_num})")
138
  continue
139
  if error_flag:
140
+ action = decider.default_action
141
  state_description, reward, termination, truncation, env_info = environment.step_llm(
142
  action
143
  )
record_reflexion.csv CHANGED
@@ -10,5 +10,5 @@ FrozenLake-v1,1,expert,200.0
10
  MountainCarContinuous-v0,1,expert,200.0
11
  RepresentedBoxing-v0,1,expert,200.0
12
  RepresentedPong-v0,1,expert,200.0
13
- Ant-v4,1,expert,100
14
 
 
10
  MountainCarContinuous-v0,1,expert,200.0
11
  RepresentedBoxing-v0,1,expert,200.0
12
  RepresentedPong-v0,1,expert,200.0
13
+ Ant-v4,1,expert,5000
14