Spaces:

rootstrap-org
/

wordle-solver

Sleeping

App Files Files Community

santit96 commited on Jan 26, 2023

Commit

3cafd2c

1 Parent(s): fa34b1d

Add play mode

Browse files

From a word a state and a saved model the model returns the probable goal word

Files changed (3) hide show

a3c/eval.py +1 -15
a3c/play.py +48 -0
main.py +21 -1

a3c/eval.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import torch
 from .net import GreedyNet
 from .utils import v_wrap
@@ -38,18 +39,3 @@ def evaluate(net, env):
     print(f"Evaluation complete, won {n_wins/N*100}% and took {n_win_guesses/n_wins} guesses per win, "
           f"{n_guesses / N} including losses.")
     return n_wins/N*100, n_win_guesses/n_wins
-def play(net, env):
-    state = env.reset()
-    outcomes = []
-    win = False
-    for i in range(env.max_turns):
-        action = net.choose_action(v_wrap(state[None, :]))
-        state, reward, done, _ = env.step(action)
-        outcomes.append((env.words[action], reward))
-        if done:
-            if reward >= 0:
-                win = True
-            break
-    return win, outcomes

 import torch
 from .net import GreedyNet
+from .play import play
 from .utils import v_wrap
     print(f"Evaluation complete, won {n_wins/N*100}% and took {n_win_guesses/n_wins} guesses per win, "
           f"{n_guesses / N} including losses.")
     return n_wins/N*100, n_win_guesses/n_wins

a3c/play.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+from .net import GreedyNet
+from .utils import v_wrap
+from wordle_env.state import update_from_mask
+def suggest(
+        env,
+        words,
+        states,
+        pretrained_model_path
+) -> str:
+    """
+    Given a list of words and masks, return the next suggested word
+    :param agent:
+    :param env:
+    :param sequence: History of moves and outcomes until now
+    :return:
+    """
+    n_s = env.observation_space.shape[0]
+    n_a = env.action_space.n
+    env = env.unwrapped
+    state = env.reset()
+    words_list = env.words
+    word_width = len(env.words[0])
+    net = GreedyNet(n_s, n_a, words_list, word_width)
+    net.load_state_dict(torch.load(pretrained_model_path))
+    for word, mask in zip(words, states):
+        word = word.upper()
+        mask = list(map(int, mask))
+        state = update_from_mask(state, word, mask)
+    return env.words[net.choose_action(v_wrap(state[None, :]))]
+def play(net, env):
+    state = env.reset()
+    outcomes = []
+    win = False
+    for i in range(env.max_turns):
+        action = net.choose_action(v_wrap(state[None, :]))
+        state, reward, done, _ = env.step(action)
+        outcomes.append((env.words[action], reward))
+        if done:
+            if reward >= 0:
+                win = True
+            break
+    return win, outcomes

main.py CHANGED Viewed

@@ -8,6 +8,7 @@ import time
 import matplotlib.pyplot as plt
 from a3c.train import train
 from a3c.eval import evaluate, evaluate_checkpoints
 from wordle_env.wordle import WordleEnvBase
@@ -27,6 +28,15 @@ def evaluation_mode(args, env, model_checkpoint_dir):
     print(results)
 def print_results(global_ep, win_ep, res):
     print("Jugadas:", global_ep.value)
     print("Ganadas:", win_ep.value)
@@ -49,7 +59,7 @@ if __name__ == "__main__":
     parser_train.add_argument(
         "--games", "-g", help="Number of games to train", type=int, required=True)
     parser_train.add_argument(
-        "--model_name", "-n", help="If want to train from a pretrained model, the name of the pretrained model file")
     parser_train.add_argument(
         "--gamma", help="Gamma hyperparameter (discount factor) value", type=float, default=0.)
     parser_train.add_argument(
@@ -64,6 +74,16 @@ if __name__ == "__main__":
         'eval', help='Evaluate saved models for the enviroment')
     parser_eval.set_defaults(func=evaluation_mode)
     args = parser.parse_args()
     env_id = args.enviroment
     env = gym.make(env_id)

 import matplotlib.pyplot as plt
 from a3c.train import train
 from a3c.eval import evaluate, evaluate_checkpoints
+from a3c.play import suggest
 from wordle_env.wordle import WordleEnvBase
     print(results)
+def play_mode(args, env, model_checkpoint_dir):
+    print("Play mode")
+    words = [ word.strip() for word in args.words.split(',') ]
+    states = [ state.strip() for state in args.states.split(',') ]
+    pretrained_model_path = os.path.join(model_checkpoint_dir, args.model_name)
+    word = suggest(env, words, states, pretrained_model_path)
+    print(word)
 def print_results(global_ep, win_ep, res):
     print("Jugadas:", global_ep.value)
     print("Ganadas:", win_ep.value)
     parser_train.add_argument(
         "--games", "-g", help="Number of games to train", type=int, required=True)
     parser_train.add_argument(
+        "--model_name", "-m", help="If want to train from a pretrained model, the name of the pretrained model file")
     parser_train.add_argument(
         "--gamma", help="Gamma hyperparameter (discount factor) value", type=float, default=0.)
     parser_train.add_argument(
         'eval', help='Evaluate saved models for the enviroment')
     parser_eval.set_defaults(func=evaluation_mode)
+    parser_play = subparsers.add_parser(
+        'play', help='Give the model a word and the state result and the model will try to predict the goal word')
+    parser_play.add_argument(
+        "--words", "-w", help="List of words played in the wordle game", required=True)
+    parser_play.add_argument(
+        "--states", "-st", help="List of states returned by playing each of the words", required=True)
+    parser_play.add_argument(
+        "--model_name", "-m", help="Name of the pretrained model file thich will play the game", required=True)
+    parser_play.set_defaults(func=play_mode)
     args = parser.parse_args()
     env_id = args.enviroment
     env = gym.make(env_id)