Spaces:

KittyCat00
/

CatGPT

Sleeping

KittyCat00 commited on Sep 30, 2024

Commit

8ba60a0

verified ·

1 Parent(s): 6fa3bf4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -485,19 +485,27 @@ def main(input_text, max_new_tokens):
     else:
         device = torch.device("cpu")
-    # weights = torch.load("model_and_optimizer.pth", map_location=torch.device(device))
-    weights = torch.load("model_and_optimizer.pth", weights_only=False)
-    model = GPTModel({
-    "vocab_size": 50257,   # Vocabulary size
-    "context_length": 512, # Shortened context length (orig: 1024)
-    "emb_dim": 768,        # Embedding dimension
-    "n_heads": 12,         # Number of attention heads
-    "n_layers": 12,        # Number of layers
-    "drop_rate": 0.3,      # Dropout rate
-    "qkv_bias": False      # Query-key-value bias
-    }).to(device)
-    model.load_state_dict(weights['model_state_dict'])
     model.eval()
     context_size = model.pos_emb.weight.shape[0]

     else:
         device = torch.device("cpu")
+    checkpoint = torch.load("model_and_optimizer.pth", weights_only=True)
+    model = GPTModel(GPT_CONFIG_124M)
+    model.load_state_dict(checkpoint["model_state_dict"])
+    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0005, weight_decay=0.1)
+    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
+    # weights = torch.load("model_and_optimizer.pth", map_location=torch.device(device))
+    # weights = torch.load("model_and_optimizer.pth", weights_only=False)
+    # model = GPTModel({
+    # "vocab_size": 50257,   # Vocabulary size
+    # "context_length": 512, # Shortened context length (orig: 1024)
+    # "emb_dim": 768,        # Embedding dimension
+    # "n_heads": 12,         # Number of attention heads
+    # "n_layers": 12,        # Number of layers
+    # "drop_rate": 0.3,      # Dropout rate
+    # "qkv_bias": False      # Query-key-value bias
+    # }).to(device)
+    # model.load_state_dict(weights['model_state_dict'])
     model.eval()
     context_size = model.pos_emb.weight.shape[0]