Spaces:

Jsevisal
/

semantrix

Paused

Javierss commited on Oct 24, 2024

Commit

5c1251d

1 Parent(s): e180617

Rm external word files

Files changed (5) hide show

__pycache__/game_transformer.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/game_transformer.cpython-311.pyc and b/__pycache__/game_transformer.cpython-311.pyc differ

config/possible_words_part1.gz DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3cdee4ac8ccd290a1e2b62762a88183132d82a4a20cca4c98cdd2aead3d2b8b0
-size 3040618

config/possible_words_part2.gz DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:57d2bb64bff51827be8bf24d5689f3f49670fe556010d8cb3f2cc98875c155f2
-size 3040822

data/ranking.txt CHANGED Viewed

@@ -1,3 +1,9 @@
-['#1', 'a', 6.34]
 ---------------------------
-['#1', 'a', 6.34]

+['#7', 'luz', 6.99]
 ---------------------------
+['#1', 'amigo', 7.99]
+['#2', 'persona', 7.9]
+['#7', 'luz', 6.99]
+['#6', 'amistad', 6.36]
+['#4', 'familiar', 6.32]
+['#5', 'hermano', 5.84]
+['#3', 'familia', 4.77]

game_transformer.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # %%
 import json
-import gzip
 import random
 from datetime import datetime
 import numpy as np
@@ -24,8 +24,6 @@ class Semantrix:
     config_file_path = "config/lang.json"
     secret_file_path = "config/secret.json"
-    possible_words_file_path_1 = "config/possible_words_part1.gz"
-    possible_words_file_path_2 = "config/possible_words_part2.gz"
     data_path = "data/"
     class DictWrapper:
@@ -55,12 +53,16 @@ class Semantrix:
             file.write("---------------------------")
         self.possible_words = []
-        with gzip.open(self.possible_words_file_path_1, "rt", encoding="utf-8") as f1:
-            self.possible_words.extend(f1.read().splitlines())
-        # Load the second part
-        with gzip.open(self.possible_words_file_path_2, "rt", encoding="utf-8") as f2:
-            self.possible_words.extend(f2.read().splitlines())
     def prepare_game(self, difficulty):

 # %%
 import json
+import re
 import random
 from datetime import datetime
 import numpy as np
     config_file_path = "config/lang.json"
     secret_file_path = "config/secret.json"
     data_path = "data/"
     class DictWrapper:
             file.write("---------------------------")
         self.possible_words = []
+        model_eng = KeyedVectors.load("config/w2v_models/eng_w2v_model", mmap="r")
+        self.possible_words.extend(list(model_eng.key_to_index.keys()))
+        model_esp = KeyedVectors.load("config/w2v_models/esp_w2v_model", mmap="r")
+        self.possible_words.extend(list(model_esp.key_to_index.keys()))
+        del model_eng, model_esp
+        pattern = re.compile(r"^[a-zA-Z0-9áéíóúÁÉÍÓÚñÑüÜ]+$")
+        filtered_words = [word for word in self.possible_words if pattern.match(word)]
+        unique_words = set(word.lower() for word in filtered_words)
+        self.possible_words = list(unique_words)
     def prepare_game(self, difficulty):