Spaces:
Sleeping
Sleeping
Fix styles and deleted unused code of wordle_env
Browse files- wordle_env/__init__.py +0 -3
- wordle_env/state.py +2 -4
- wordle_env/wordle.py +17 -39
wordle_env/__init__.py
CHANGED
@@ -7,9 +7,6 @@ from gym.envs.registration import (
|
|
7 |
)
|
8 |
import os
|
9 |
from . import wordle
|
10 |
-
# Classic
|
11 |
-
# ----------------------------------------
|
12 |
-
current_dir = os.path.realpath(os.path.dirname(__file__))
|
13 |
|
14 |
|
15 |
register(
|
|
|
7 |
)
|
8 |
import os
|
9 |
from . import wordle
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
register(
|
wordle_env/state.py
CHANGED
@@ -2,13 +2,11 @@
|
|
2 |
Keep the state in a 1D int array
|
3 |
|
4 |
index[0] = remaining steps
|
5 |
-
Rest of data is laid out as binary array
|
6 |
-
|
7 |
-
[1..27] = whether char has been guessed or not
|
8 |
|
9 |
[[status, status, status, status, status]
|
10 |
for _ in "ABCD..."]
|
11 |
where status has codes
|
|
|
12 |
[1, 0, 0] - char is definitely not in this spot
|
13 |
[0, 1, 0] - char is maybe in this spot
|
14 |
[0, 0, 1] - char is definitely in this spot
|
@@ -126,6 +124,7 @@ def get_mask(word: str, goal_word: str) -> List[int]:
|
|
126 |
|
127 |
return mask
|
128 |
|
|
|
129 |
def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
|
130 |
"""
|
131 |
return a copy of state that has been updated to new state
|
@@ -171,4 +170,3 @@ def update(state: WordleState, word: str, goal_word: str) -> WordleState:
|
|
171 |
state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
|
172 |
processed_letters.append(c)
|
173 |
return state, reward
|
174 |
-
|
|
|
2 |
Keep the state in a 1D int array
|
3 |
|
4 |
index[0] = remaining steps
|
|
|
|
|
|
|
5 |
|
6 |
[[status, status, status, status, status]
|
7 |
for _ in "ABCD..."]
|
8 |
where status has codes
|
9 |
+
[0, 0, 0] - no information about the char
|
10 |
[1, 0, 0] - char is definitely not in this spot
|
11 |
[0, 1, 0] - char is maybe in this spot
|
12 |
[0, 0, 1] - char is definitely in this spot
|
|
|
124 |
|
125 |
return mask
|
126 |
|
127 |
+
|
128 |
def update_mask(state: WordleState, word: str, goal_word: str) -> WordleState:
|
129 |
"""
|
130 |
return a copy of state that has been updated to new state
|
|
|
170 |
state[offset:offset + 3 * WORDLE_N] = [1, 0, 0] * WORDLE_N
|
171 |
processed_letters.append(c)
|
172 |
return state, reward
|
|
wordle_env/wordle.py
CHANGED
@@ -12,9 +12,11 @@ from .words import complete_vocabulary, target_vocabulary
|
|
12 |
|
13 |
import random
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
18 |
|
19 |
class WordleEnvBase(gym.Env):
|
20 |
"""
|
@@ -23,21 +25,21 @@ class WordleEnvBase(gym.Env):
|
|
23 |
* 13k for full vocab
|
24 |
State space is defined as:
|
25 |
* 6 possibilities for turns (WORDLE_TURNS)
|
26 |
-
* Each VALID_CHAR has a state of 0/1 for whether it's been guessed before
|
27 |
* For each in VALID_CHARS [A-Z] can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
|
28 |
for full game, this is (3^5)^26
|
29 |
Each state has 1 + 5*26 possibilities
|
30 |
Reward:
|
31 |
Reward is 10 for guessing the right word, -10 for not guessing the right word after 6 guesses.
|
|
|
32 |
Starting State:
|
33 |
Random goal word
|
34 |
-
Initial state with turn 0, all chars Unvisited
|
35 |
"""
|
|
|
36 |
def __init__(self, words: List[str],
|
37 |
-
max_turns: int=6,
|
38 |
-
allowable_words: Optional[int]=None,
|
39 |
-
|
40 |
-
mask_based_state_updates: bool=False):
|
41 |
assert all(len(w) == WORDLE_N for w in words), f'Not all words of length {WORDLE_N}, {words}'
|
42 |
self.words = words
|
43 |
self.max_turns = max_turns
|
@@ -46,16 +48,11 @@ class WordleEnvBase(gym.Env):
|
|
46 |
if not self.allowable_words:
|
47 |
self.allowable_words = len(self.words)
|
48 |
|
49 |
-
self.frequencies = None
|
50 |
-
if frequencies:
|
51 |
-
assert len(words) == len(frequencies), f'{len(words), len(frequencies)}'
|
52 |
-
self.frequencies = np.array(frequencies, dtype=np.float32) / sum(frequencies)
|
53 |
-
|
54 |
self.action_space = spaces.Discrete(self.words_as_action_space())
|
55 |
self.observation_space = spaces.MultiDiscrete(state.get_nvec(self.max_turns))
|
56 |
|
57 |
self.done = True
|
58 |
-
self.goal_word:
|
59 |
|
60 |
self.state: state.WordleState = None
|
61 |
self.state_updater = state.update
|
@@ -74,17 +71,17 @@ class WordleEnvBase(gym.Env):
|
|
74 |
goal_word = self.words[self.goal_word]
|
75 |
# assert word in self.words, f'{word} not in words list'
|
76 |
self.state, r = self.state_updater(state=self.state,
|
77 |
-
|
78 |
-
|
79 |
|
80 |
reward = r
|
81 |
if action == self.goal_word:
|
82 |
self.done = True
|
83 |
-
#reward = REWARD
|
84 |
if state.remaining_steps(self.state) == self.max_turns-1:
|
85 |
-
reward = 0
|
86 |
else:
|
87 |
-
#reward = REWARD*(self.state.remaining_steps() + 1) / self.max_turns
|
88 |
reward = REWARD
|
89 |
elif state.remaining_steps(self.state) == 0:
|
90 |
self.done = True
|
@@ -108,25 +105,6 @@ class WordleEnvBase(gym.Env):
|
|
108 |
def words_as_action_space(self):
|
109 |
return len(self.words)
|
110 |
|
111 |
-
def encode_word(self, word):
|
112 |
-
encoded_word = np.array(
|
113 |
-
[[0] * WORDLE_N] * len(WORDLE_CHARS),
|
114 |
-
dtype=np.int32
|
115 |
-
)
|
116 |
-
for index, letter in enumerate(word):
|
117 |
-
cint = WORDLE_CHARS.index(letter)
|
118 |
-
encoded_word[cint][index] = 1
|
119 |
-
return encoded_word
|
120 |
-
|
121 |
-
def decode_word(self, action):
|
122 |
-
word = [''] * WORDLE_N
|
123 |
-
for index, letter_vec in enumerate(action):
|
124 |
-
if 1 in letter_vec:
|
125 |
-
for i, j in enumerate(letter_vec):
|
126 |
-
if j == 1:
|
127 |
-
word[i] = WORDLE_CHARS[index]
|
128 |
-
return ''.join(word)
|
129 |
-
|
130 |
|
131 |
class WordleEnv100OneAction(WordleEnvBase):
|
132 |
def __init__(self):
|
|
|
12 |
|
13 |
import random
|
14 |
|
15 |
+
|
16 |
+
def _load_words(limit: Optional[int] = None, complete: Optional[bool] = False) -> List[str]:
|
17 |
+
words = complete_vocabulary if complete else target_vocabulary
|
18 |
+
return words if not limit else words[:limit]
|
19 |
+
|
20 |
|
21 |
class WordleEnvBase(gym.Env):
|
22 |
"""
|
|
|
25 |
* 13k for full vocab
|
26 |
State space is defined as:
|
27 |
* 6 possibilities for turns (WORDLE_TURNS)
|
|
|
28 |
* For each in VALID_CHARS [A-Z] can be in one of 3^WORDLE_N states: (No, Maybe, Yes)
|
29 |
for full game, this is (3^5)^26
|
30 |
Each state has 1 + 5*26 possibilities
|
31 |
Reward:
|
32 |
Reward is 10 for guessing the right word, -10 for not guessing the right word after 6 guesses.
|
33 |
+
1 from every letter correctly guessed on each try
|
34 |
Starting State:
|
35 |
Random goal word
|
36 |
+
Initial state with turn 0, all chars Unvisited
|
37 |
"""
|
38 |
+
|
39 |
def __init__(self, words: List[str],
|
40 |
+
max_turns: int = 6,
|
41 |
+
allowable_words: Optional[int] = None,
|
42 |
+
mask_based_state_updates: bool = False):
|
|
|
43 |
assert all(len(w) == WORDLE_N for w in words), f'Not all words of length {WORDLE_N}, {words}'
|
44 |
self.words = words
|
45 |
self.max_turns = max_turns
|
|
|
48 |
if not self.allowable_words:
|
49 |
self.allowable_words = len(self.words)
|
50 |
|
|
|
|
|
|
|
|
|
|
|
51 |
self.action_space = spaces.Discrete(self.words_as_action_space())
|
52 |
self.observation_space = spaces.MultiDiscrete(state.get_nvec(self.max_turns))
|
53 |
|
54 |
self.done = True
|
55 |
+
self.goal_word: int = -1
|
56 |
|
57 |
self.state: state.WordleState = None
|
58 |
self.state_updater = state.update
|
|
|
71 |
goal_word = self.words[self.goal_word]
|
72 |
# assert word in self.words, f'{word} not in words list'
|
73 |
self.state, r = self.state_updater(state=self.state,
|
74 |
+
word=word,
|
75 |
+
goal_word=goal_word)
|
76 |
|
77 |
reward = r
|
78 |
if action == self.goal_word:
|
79 |
self.done = True
|
80 |
+
# reward = REWARD
|
81 |
if state.remaining_steps(self.state) == self.max_turns-1:
|
82 |
+
reward = 0 # -10*REWARD # No reward for guessing off the bat
|
83 |
else:
|
84 |
+
# reward = REWARD*(self.state.remaining_steps() + 1) / self.max_turns
|
85 |
reward = REWARD
|
86 |
elif state.remaining_steps(self.state) == 0:
|
87 |
self.done = True
|
|
|
105 |
def words_as_action_space(self):
|
106 |
return len(self.words)
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
class WordleEnv100OneAction(WordleEnvBase):
|
110 |
def __init__(self):
|