jatingocodeo commited on
Commit
c88c76b
·
verified ·
1 Parent(s): cd9eb25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -25
app.py CHANGED
@@ -3,6 +3,9 @@ import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
4
  from huggingface_hub import hf_hub_download
5
  import json
 
 
 
6
 
7
  # Define the model architecture
8
  class SmolLM2Config(PretrainedConfig):
@@ -56,26 +59,35 @@ class SmolLM2ForCausalLM(PreTrainedModel):
56
  def __init__(self, config):
57
  super().__init__(config)
58
  self.config = config
 
 
 
 
59
 
60
- # Initialize model weights from your checkpoint
61
- self.model = AutoModelForCausalLM.from_pretrained(
62
- "jatingocodeo/SmolLM2",
63
- config=config,
64
- torch_dtype=torch.float16,
65
- low_cpu_mem_usage=True,
66
- trust_remote_code=True
67
- )
68
-
69
  def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
70
- return self.model(
71
- input_ids=input_ids,
72
- attention_mask=attention_mask,
73
- labels=labels,
74
- **kwargs
75
- )
 
 
 
 
 
 
 
 
76
 
77
  def prepare_inputs_for_generation(self, input_ids, **kwargs):
78
- return self.model.prepare_inputs_for_generation(input_ids, **kwargs)
 
 
 
79
 
80
  # Register the model
81
  AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
@@ -111,19 +123,20 @@ def initialize():
111
  }
112
  TOKENIZER.add_special_tokens(special_tokens)
113
 
114
- # Load model
115
  print("Loading model...")
116
- MODEL = SmolLM2ForCausalLM.from_pretrained(
117
- model_id,
118
- config=config,
119
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
120
- trust_remote_code=True,
121
- low_cpu_mem_usage=True
122
- )
 
123
 
124
  # Move model to device
125
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
126
- MODEL.to(device)
127
 
128
  print(f"Model loaded successfully on {device}")
129
 
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
4
  from huggingface_hub import hf_hub_download
5
  import json
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ import math
9
 
10
  # Define the model architecture
11
  class SmolLM2Config(PretrainedConfig):
 
59
  def __init__(self, config):
60
  super().__init__(config)
61
  self.config = config
62
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
63
+ self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)])
64
+ self.norm = RMSNorm(config.hidden_size, config.rms_norm_eps)
65
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
66
 
67
+ if config.tie_word_embeddings:
68
+ self.lm_head.weight = self.embed_tokens.weight
69
+
 
 
 
 
 
 
70
  def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
71
+ hidden_states = self.embed_tokens(input_ids)
72
+
73
+ # Process through layers
74
+ for layer in self.layers:
75
+ hidden_states = layer(hidden_states, attention_mask)
76
+
77
+ hidden_states = self.norm(hidden_states)
78
+ logits = self.lm_head(hidden_states)
79
+
80
+ loss = None
81
+ if labels is not None:
82
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
83
+
84
+ return logits if loss is None else (loss, logits)
85
 
86
  def prepare_inputs_for_generation(self, input_ids, **kwargs):
87
+ return {
88
+ "input_ids": input_ids,
89
+ "attention_mask": kwargs.get("attention_mask", None)
90
+ }
91
 
92
  # Register the model
93
  AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
 
123
  }
124
  TOKENIZER.add_special_tokens(special_tokens)
125
 
126
+ # Load model weights
127
  print("Loading model...")
128
+ weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin")
129
+
130
+ # Initialize model
131
+ MODEL = SmolLM2ForCausalLM(config)
132
+
133
+ # Load state dict
134
+ state_dict = torch.load(weights_path, map_location="cpu")
135
+ MODEL.load_state_dict(state_dict)
136
 
137
  # Move model to device
138
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
139
+ MODEL = MODEL.to(device)
140
 
141
  print(f"Model loaded successfully on {device}")
142