Spaces:

jatingocodeo
/

SmolLM2

Runtime error

App Files Files Community

jatingocodeo commited on Jan 27

Commit

c88c76b

verified ·

1 Parent(s): cd9eb25

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -25

app.py CHANGED Viewed

@@ -3,6 +3,9 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
 from huggingface_hub import hf_hub_download
 import json
 # Define the model architecture
 class SmolLM2Config(PretrainedConfig):
@@ -56,26 +59,35 @@ class SmolLM2ForCausalLM(PreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
         self.config = config
-        # Initialize model weights from your checkpoint
-        self.model = AutoModelForCausalLM.from_pretrained(
-            "jatingocodeo/SmolLM2",
-            config=config,
-            torch_dtype=torch.float16,
-            low_cpu_mem_usage=True,
-            trust_remote_code=True
-        )
     def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
-        return self.model(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            labels=labels,
-            **kwargs
-        )
     def prepare_inputs_for_generation(self, input_ids, **kwargs):
-        return self.model.prepare_inputs_for_generation(input_ids, **kwargs)
 # Register the model
 AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
@@ -111,19 +123,20 @@ def initialize():
             }
             TOKENIZER.add_special_tokens(special_tokens)
-            # Load model
             print("Loading model...")
-            MODEL = SmolLM2ForCausalLM.from_pretrained(
-                model_id,
-                config=config,
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                trust_remote_code=True,
-                low_cpu_mem_usage=True
-            )
             # Move model to device
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            MODEL.to(device)
             print(f"Model loaded successfully on {device}")

 from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
 from huggingface_hub import hf_hub_download
 import json
+import torch.nn as nn
+import torch.nn.functional as F
+import math
 # Define the model architecture
 class SmolLM2Config(PretrainedConfig):
     def __init__(self, config):
         super().__init__(config)
         self.config = config
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)])
+        self.norm = RMSNorm(config.hidden_size, config.rms_norm_eps)
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+        if config.tie_word_embeddings:
+            self.lm_head.weight = self.embed_tokens.weight
     def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
+        hidden_states = self.embed_tokens(input_ids)
+        # Process through layers
+        for layer in self.layers:
+            hidden_states = layer(hidden_states, attention_mask)
+        hidden_states = self.norm(hidden_states)
+        logits = self.lm_head(hidden_states)
+        loss = None
+        if labels is not None:
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
+        return logits if loss is None else (loss, logits)
     def prepare_inputs_for_generation(self, input_ids, **kwargs):
+        return {
+            "input_ids": input_ids,
+            "attention_mask": kwargs.get("attention_mask", None)
+        }
 # Register the model
 AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
             }
             TOKENIZER.add_special_tokens(special_tokens)
+            # Load model weights
             print("Loading model...")
+            weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin")
+            # Initialize model
+            MODEL = SmolLM2ForCausalLM(config)
+            # Load state dict
+            state_dict = torch.load(weights_path, map_location="cpu")
+            MODEL.load_state_dict(state_dict)
             # Move model to device
             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            MODEL = MODEL.to(device)
             print(f"Model loaded successfully on {device}")