ElapticAI-1a-chat

Sleeping

App Files Files Community

elapt1c commited on Jan 22

Commit

284e370

verified ·

1 Parent(s): 1610e28

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -9

app.py CHANGED Viewed

@@ -1,12 +1,37 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
 from huggingface_hub import hf_hub_download
 import os
 # Model and tokenizer details
 model_repo = "elapt1c/ElapticAI-1a"
-model_filename = "model.pth" # Assuming the model is saved as pytorch_model.bin, adjust if needed. Check the HF repo.
 tokenizer_name = "microsoft/DialoGPT-medium"
 # Device configuration
@@ -14,11 +39,14 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
-# Load model configuration
-config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium")
-# Initialize model from config (important to use the same architecture)
-model = AutoModelForCausalLM.from_config(config)
 # Download and load model weights
 try:
@@ -38,7 +66,7 @@ try:
     print(f"Successfully loaded model weights from {model_repo}/{model_filename}")
 except Exception as e:
     print(f"Error loading model: {e}")
-    print("Please ensure the model repository and filename are correct.")
     raise e # It's better to raise the error in a Space, so it's visible.
 model.to(device)
@@ -52,8 +80,8 @@ def chat_with_model(user_input, history=[]):
     input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
     with torch.no_grad():
-        output = model.generate(
-            input_ids,
             max_length=1000, # Adjust as needed
             pad_token_id=tokenizer.eos_token_id,
             temperature=0.7,

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, AutoModel
 from huggingface_hub import hf_hub_download
 import os
+import torch.nn as nn
+# ----- Model Definition -----
+class CustomDialoGPT(nn.Module):
+    def __init__(self, vocab_size, n_embd=768, n_head=12, n_layer=12): # <---- FORCE n_embd, n_head, n_layer to match DialoGPT-medium
+        super().__init__()
+        config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium",
+            vocab_size=vocab_size,
+            n_embd=n_embd,
+            n_head=n_head,
+            n_layer=n_layer,
+            bos_token_id=50256,
+            eos_token_id=50256,
+            pad_token_id = 50256
+        )
+        self.transformer = AutoModelForCausalLM.from_config(config) # Use AutoModelForCausalLM here
+        self.lm_head = nn.Linear(n_embd, vocab_size, bias=False) # Keep lm_head
+    def forward(self, input_ids):
+        transformer_outputs = self.transformer(input_ids=input_ids, output_hidden_states=True)
+        hidden_states = transformer_outputs.hidden_states[-1] #get last hidden state
+        logits = self.lm_head(hidden_states)
+        return logits
 # Model and tokenizer details
 model_repo = "elapt1c/ElapticAI-1a"
+model_filename = "model.pth" # <--- CHECK FILENAME ON HF HUB, UPDATE IF NEEDED!
 tokenizer_name = "microsoft/DialoGPT-medium"
 # Device configuration
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+vocab_size = len(tokenizer)
+# Initialize model with fixed parameters to match checkpoint
+n_embd=768 # <---- FORCE n_embd to 768
+n_head=12  # <---- FORCE n_head to 12
+n_layer=12 # <---- FORCE n_layer to 12
+model = CustomDialoGPT(vocab_size, n_embd, n_head, n_layer)
 # Download and load model weights
 try:
     print(f"Successfully loaded model weights from {model_repo}/{model_filename}")
 except Exception as e:
     print(f"Error loading model: {e}")
+    print("Please ensure the model repository and filename are correct and that the model architecture in app.py matches the checkpoint.")
     raise e # It's better to raise the error in a Space, so it's visible.
 model.to(device)
     input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
     with torch.no_grad():
+        output = model.transformer.generate( # Use model.transformer.generate here
+            inputs=input_ids, # Use inputs instead of input_ids
             max_length=1000, # Adjust as needed
             pad_token_id=tokenizer.eos_token_id,
             temperature=0.7,