santit96 commited on
Commit
79febd9
·
1 Parent(s): e300cfa

Fix styles and deleted unused code of wordle_env

Browse files
wordle_env/__init__.py CHANGED
@@ -7,9 +7,6 @@ from gym.envs.registration import (
7
  )
8
  import os
9
  from . import wordle
10
- # Classic
11
- # ----------------------------------------
12
- current_dir = os.path.realpath(os.path.dirname(__file__))
13
 
14
 
15
  register(
 
7
  )
8
  import os
9
  from . import wordle
 
 
 
10
 
11
 
12
  register(
wordle_env/state.py CHANGED
@@ -2,13 +2,11 @@
2
  Keep the state in a 1D int array
3
 
4
  index[0] = remaining steps
5
- Rest of data is laid out as binary array
6
-
7
- [1..27] = whether char has been guessed or not
8
 
9
  [[status, status, status, status, status]
10
  for _ in "ABCD..."]
11
  where status has codes
 
12
  [1, 0, 0] - char is definitely not in this spot
13
  [0, 1, 0] - char is maybe in this spot
14
  [0, 0, 1] - char is definitely in this spot
@@ -126,6 +124,7 @@ def get_mask(word: str, goal_word: str) -> List[int]:
126
 
127
  return mask
128
 
 
129
  def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
130
  """
131
  return a copy of state that has been updated to new state
@@ -171,4 +170,3 @@ def update(state: WordleState, word: str, goal_word: str) -> WordleState:
171
  state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
172
  processed_letters.append(c)
173
  return state, reward
174
-
 
2
  Keep the state in a 1D int array
3
 
4
  index[0] = remaining steps
 
 
 
5
 
6
  [[status, status, status, status, status]
7
  for _ in "ABCD..."]
8
  where status has codes
9
+ [0, 0, 0] - no information about the char
10
  [1, 0, 0] - char is definitely not in this spot
11
  [0, 1, 0] - char is maybe in this spot
12
  [0, 0, 1] - char is definitely in this spot
 
124
 
125
  return mask
126
 
127
+
128
  def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
129
  """
130
  return a copy of state that has been updated to new state
 
170
  state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
171
  processed_letters.append(c)
172
  return state, reward
 
wordle_env/wordle.py CHANGED
@@ -12,9 +12,11 @@ from .words import complete_vocabulary, target_vocabulary
12
 
13
  import random
14
 
15
- def _load_words(limit: Optional[int]=None, complete: Optional[bool]=False) -> List[str]:
16
- words = complete_vocabulary if complete else target_vocabulary
17
- return words if not limit else words[:limit]
 
 
18
 
19
  class WordleEnvBase(gym.Env):
20
  """
@@ -23,21 +25,21 @@ class WordleEnvBase(gym.Env):
23
  * 13k for full vocab
24
  State space is defined as:
25
  * 6 possibilities for turns (WORDLE_TURNS)
26
- * Each VALID_CHAR has a state of 0/1 for whether it's been guessed before
27
  * For each in VALID_CHARS [A-Z] can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
28
  for full game, this is (3^5)^26
29
  Each state has 1 + 5*26 possibilities
30
  Reward:
31
  Reward is 10 for guessing the right word, -10 for not guessing the right word after 6 guesses.
 
32
  Starting State:
33
  Random goal word
34
- Initial state with turn 0, all chars Unvisited + Maybe
35
  """
 
36
  def __init__(self, words: List[str],
37
- max_turns: int=6,
38
- allowable_words: Optional[int]=None,
39
- frequencies: Optional[List[float]]=None,
40
- mask_based_state_updates: bool=False):
41
  assert all(len(w) == WORDLE_N for w in words), f'Not all words of length {WORDLE_N}, {words}'
42
  self.words = words
43
  self.max_turns = max_turns
@@ -46,16 +48,11 @@ class WordleEnvBase(gym.Env):
46
  if not self.allowable_words:
47
  self.allowable_words = len(self.words)
48
 
49
- self.frequencies = None
50
- if frequencies:
51
- assert len(words) == len(frequencies), f'{len(words), len(frequencies)}'
52
- self.frequencies = np.array(frequencies, dtype=np.float32) / sum(frequencies)
53
-
54
  self.action_space = spaces.Discrete(self.words_as_action_space())
55
  self.observation_space = spaces.MultiDiscrete(state.get_nvec(self.max_turns))
56
 
57
  self.done = True
58
- self.goal_word: Tuple = tuple(tuple([tuple([-1]) * WORDLE_N]) *len(WORDLE_CHARS))
59
 
60
  self.state: state.WordleState = None
61
  self.state_updater = state.update
@@ -74,17 +71,17 @@ class WordleEnvBase(gym.Env):
74
  goal_word = self.words[self.goal_word]
75
  # assert word in self.words, f'{word} not in words list'
76
  self.state, r = self.state_updater(state=self.state,
77
- word=word,
78
- goal_word=goal_word)
79
 
80
  reward = r
81
  if action == self.goal_word:
82
  self.done = True
83
- #reward = REWARD
84
  if state.remaining_steps(self.state) == self.max_turns-1:
85
- reward = 0#-10*REWARD # No reward for guessing off the bat
86
  else:
87
- #reward = REWARD*(self.state.remaining_steps() + 1) / self.max_turns
88
  reward = REWARD
89
  elif state.remaining_steps(self.state) == 0:
90
  self.done = True
@@ -108,25 +105,6 @@ class WordleEnvBase(gym.Env):
108
  def words_as_action_space(self):
109
  return len(self.words)
110
 
111
- def encode_word(self, word):
112
- encoded_word = np.array(
113
- [[0] * WORDLE_N] * len(WORDLE_CHARS),
114
- dtype=np.int32
115
- )
116
- for index, letter in enumerate(word):
117
- cint = WORDLE_CHARS.index(letter)
118
- encoded_word[cint][index] = 1
119
- return encoded_word
120
-
121
- def decode_word(self, action):
122
- word = [''] * WORDLE_N
123
- for index, letter_vec in enumerate(action):
124
- if 1 in letter_vec:
125
- for i, j in enumerate(letter_vec):
126
- if j == 1:
127
- word[i] = WORDLE_CHARS[index]
128
- return ''.join(word)
129
-
130
 
131
  class WordleEnv100OneAction(WordleEnvBase):
132
  def __init__(self):
 
12
 
13
  import random
14
 
15
+
16
+ def _load_words(limit: Optional[int] = None, complete: Optional[bool] = False) -> List[str]:
17
+ words = complete_vocabulary if complete else target_vocabulary
18
+ return words if not limit else words[:limit]
19
+
20
 
21
  class WordleEnvBase(gym.Env):
22
  """
 
25
  * 13k for full vocab
26
  State space is defined as:
27
  * 6 possibilities for turns (WORDLE_TURNS)
 
28
  * For each in VALID_CHARS [A-Z] can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
29
  for full game, this is (3^5)^26
30
  Each state has 1 + 5*26 possibilities
31
  Reward:
32
  Reward is 10 for guessing the right word, -10 for not guessing the right word after 6 guesses.
33
+ 1 from every letter correctly guessed on each try
34
  Starting State:
35
  Random goal word
36
+ Initial state with turn 0, all chars Unvisited
37
  """
38
+
39
  def __init__(self, words: List[str],
40
+ max_turns: int = 6,
41
+ allowable_words: Optional[int] = None,
42
+ mask_based_state_updates: bool = False):
 
43
  assert all(len(w) == WORDLE_N for w in words), f'Not all words of length {WORDLE_N}, {words}'
44
  self.words = words
45
  self.max_turns = max_turns
 
48
  if not self.allowable_words:
49
  self.allowable_words = len(self.words)
50
 
 
 
 
 
 
51
  self.action_space = spaces.Discrete(self.words_as_action_space())
52
  self.observation_space = spaces.MultiDiscrete(state.get_nvec(self.max_turns))
53
 
54
  self.done = True
55
+ self.goal_word: int = -1
56
 
57
  self.state: state.WordleState = None
58
  self.state_updater = state.update
 
71
  goal_word = self.words[self.goal_word]
72
  # assert word in self.words, f'{word} not in words list'
73
  self.state, r = self.state_updater(state=self.state,
74
+ word=word,
75
+ goal_word=goal_word)
76
 
77
  reward = r
78
  if action == self.goal_word:
79
  self.done = True
80
+ # reward = REWARD
81
  if state.remaining_steps(self.state) == self.max_turns-1:
82
+ reward = 0 # -10*REWARD # No reward for guessing off the bat
83
  else:
84
+ # reward = REWARD*(self.state.remaining_steps() + 1) / self.max_turns
85
  reward = REWARD
86
  elif state.remaining_steps(self.state) == 0:
87
  self.done = True
 
105
  def words_as_action_space(self):
106
  return len(self.words)
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  class WordleEnv100OneAction(WordleEnvBase):
110
  def __init__(self):