HaileyStorm
/

chess-mamba-vs-xformer

HaileyStorm commited on Mar 13, 2024

Commit

07f1096

verified ·

1 Parent(s): 5e634b7

Upload chess-gpt-eval/mamba_module.py with huggingface_hub

Files changed (1) hide show

chess-gpt-eval/mamba_module.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
 import pickle
 import torch
-from mamba_lm import MambaLM, MambaLMConfig, from_pretrained
 from contextlib import nullcontext
 BASE_DIR = "mamba/"
@@ -41,10 +42,10 @@ class MambaPlayer:
         # Model initialization
         if init_from == "resume":
             #ckpt_path = os.path.join(BASE_DIR, out_dir, self.model_name)
-            ckpt_path = os.path.normpath(f"../../mamba.py/out/{self.model_name}")
             checkpoint = torch.load(ckpt_path, map_location=device)
             model_config = checkpoint["model_args"]
-            model = MambaLM(model_config)
             model.load_state_dict(checkpoint['model'])
         elif init_from.startswith('state-spaces'):
             model = from_pretrained(init_from).to(device)
@@ -96,7 +97,7 @@ class MambaPlayer:
         with torch.no_grad():
             have_non_space = False
             for _ in range(max_new_tokens):
-                logits = self.model(input_ids)[0, -1, :]  # Get logits for the last token
                 # Apply temperature scaling and optionally sample from top k tokens
                 logits = logits / temperature

 import os
 import pickle
 import torch
+from mamba_lm import MambaLMConfig, from_pretrained
+from mamba_ssm import MambaLMHeadModel
 from contextlib import nullcontext
 BASE_DIR = "mamba/"
         # Model initialization
         if init_from == "resume":
             #ckpt_path = os.path.join(BASE_DIR, out_dir, self.model_name)
+            ckpt_path = os.path.normpath(f"../chess-mamba-vs-xformer/out/Mamba/{self.model_name}")
             checkpoint = torch.load(ckpt_path, map_location=device)
             model_config = checkpoint["model_args"]
+            model = MambaLMHeadModel(model_config)
             model.load_state_dict(checkpoint['model'])
         elif init_from.startswith('state-spaces'):
             model = from_pretrained(init_from).to(device)
         with torch.no_grad():
             have_non_space = False
             for _ in range(max_new_tokens):
+                logits = self.model(input_ids).logits[0, -1, :]  # Get logits for the last token
                 # Apply temperature scaling and optionally sample from top k tokens
                 logits = logits / temperature