Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,9 @@ import gradio as gr
|
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
import json
|
|
|
|
|
|
|
6 |
|
7 |
# Define the model architecture
|
8 |
class SmolLM2Config(PretrainedConfig):
|
@@ -56,26 +59,35 @@ class SmolLM2ForCausalLM(PreTrainedModel):
|
|
56 |
def __init__(self, config):
|
57 |
super().__init__(config)
|
58 |
self.config = config
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
config=config,
|
64 |
-
torch_dtype=torch.float16,
|
65 |
-
low_cpu_mem_usage=True,
|
66 |
-
trust_remote_code=True
|
67 |
-
)
|
68 |
-
|
69 |
def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
def prepare_inputs_for_generation(self, input_ids, **kwargs):
|
78 |
-
return
|
|
|
|
|
|
|
79 |
|
80 |
# Register the model
|
81 |
AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
|
@@ -111,19 +123,20 @@ def initialize():
|
|
111 |
}
|
112 |
TOKENIZER.add_special_tokens(special_tokens)
|
113 |
|
114 |
-
# Load model
|
115 |
print("Loading model...")
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
)
|
|
|
123 |
|
124 |
# Move model to device
|
125 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
126 |
-
MODEL.to(device)
|
127 |
|
128 |
print(f"Model loaded successfully on {device}")
|
129 |
|
|
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
import json
|
6 |
+
import torch.nn as nn
|
7 |
+
import torch.nn.functional as F
|
8 |
+
import math
|
9 |
|
10 |
# Define the model architecture
|
11 |
class SmolLM2Config(PretrainedConfig):
|
|
|
59 |
def __init__(self, config):
|
60 |
super().__init__(config)
|
61 |
self.config = config
|
62 |
+
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
|
63 |
+
self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config.num_hidden_layers)])
|
64 |
+
self.norm = RMSNorm(config.hidden_size, config.rms_norm_eps)
|
65 |
+
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
66 |
|
67 |
+
if config.tie_word_embeddings:
|
68 |
+
self.lm_head.weight = self.embed_tokens.weight
|
69 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
|
71 |
+
hidden_states = self.embed_tokens(input_ids)
|
72 |
+
|
73 |
+
# Process through layers
|
74 |
+
for layer in self.layers:
|
75 |
+
hidden_states = layer(hidden_states, attention_mask)
|
76 |
+
|
77 |
+
hidden_states = self.norm(hidden_states)
|
78 |
+
logits = self.lm_head(hidden_states)
|
79 |
+
|
80 |
+
loss = None
|
81 |
+
if labels is not None:
|
82 |
+
loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
|
83 |
+
|
84 |
+
return logits if loss is None else (loss, logits)
|
85 |
|
86 |
def prepare_inputs_for_generation(self, input_ids, **kwargs):
|
87 |
+
return {
|
88 |
+
"input_ids": input_ids,
|
89 |
+
"attention_mask": kwargs.get("attention_mask", None)
|
90 |
+
}
|
91 |
|
92 |
# Register the model
|
93 |
AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
|
|
|
123 |
}
|
124 |
TOKENIZER.add_special_tokens(special_tokens)
|
125 |
|
126 |
+
# Load model weights
|
127 |
print("Loading model...")
|
128 |
+
weights_path = hf_hub_download(repo_id=model_id, filename="pytorch_model.bin")
|
129 |
+
|
130 |
+
# Initialize model
|
131 |
+
MODEL = SmolLM2ForCausalLM(config)
|
132 |
+
|
133 |
+
# Load state dict
|
134 |
+
state_dict = torch.load(weights_path, map_location="cpu")
|
135 |
+
MODEL.load_state_dict(state_dict)
|
136 |
|
137 |
# Move model to device
|
138 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
139 |
+
MODEL = MODEL.to(device)
|
140 |
|
141 |
print(f"Model loaded successfully on {device}")
|
142 |
|