Spaces:

jatingocodeo
/

SmolLM2

Runtime error

App Files Files Community

jatingocodeo commited on Jan 27

Commit

1a2e215

verified ·

1 Parent(s): c88c76b

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -0

app.py CHANGED Viewed

@@ -53,6 +53,72 @@ class SmolLM2Config(PretrainedConfig):
 from transformers import AutoConfig
 AutoConfig.register("smollm2", SmolLM2Config)
 class SmolLM2ForCausalLM(PreTrainedModel):
     config_class = SmolLM2Config

 from transformers import AutoConfig
 AutoConfig.register("smollm2", SmolLM2Config)
+class RMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-5):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.eps = eps
+    def forward(self, x):
+        variance = x.pow(2).mean(-1, keepdim=True)
+        x = x * torch.rsqrt(variance + self.eps)
+        return self.weight * x
+class LlamaDecoderLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = config.hidden_size // config.num_attention_heads
+        self.q_proj = nn.Linear(config.hidden_size, config.num_attention_heads * self.head_dim, bias=False)
+        self.k_proj = nn.Linear(config.hidden_size, config.num_attention_heads * self.head_dim, bias=False)
+        self.v_proj = nn.Linear(config.hidden_size, config.num_attention_heads * self.head_dim, bias=False)
+        self.o_proj = nn.Linear(config.num_attention_heads * self.head_dim, config.hidden_size, bias=False)
+        self.mlp = nn.Sequential(
+            nn.Linear(config.hidden_size, config.intermediate_size, bias=False),
+            nn.SiLU(),
+            nn.Linear(config.intermediate_size, config.hidden_size, bias=False)
+        )
+        self.input_layernorm = RMSNorm(config.hidden_size, config.rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(config.hidden_size, config.rms_norm_eps)
+    def forward(self, hidden_states, attention_mask=None):
+        # Self Attention
+        residual = hidden_states
+        hidden_states = self.input_layernorm(hidden_states)
+        # Reshape for attention
+        batch_size, seq_length, _ = hidden_states.size()
+        q = self.q_proj(hidden_states).view(batch_size, seq_length, self.num_heads, self.head_dim).transpose(1, 2)
+        k = self.k_proj(hidden_states).view(batch_size, seq_length, self.num_heads, self.head_dim).transpose(1, 2)
+        v = self.v_proj(hidden_states).view(batch_size, seq_length, self.num_heads, self.head_dim).transpose(1, 2)
+        # Compute attention scores
+        scale = 1.0 / math.sqrt(self.head_dim)
+        scores = torch.matmul(q, k.transpose(-2, -1)) * scale
+        if attention_mask is not None:
+            scores = scores + attention_mask
+        attn_weights = F.softmax(scores, dim=-1)
+        hidden_states = torch.matmul(attn_weights, v)
+        # Reshape back
+        hidden_states = hidden_states.transpose(1, 2).contiguous().view(batch_size, seq_length, -1)
+        hidden_states = self.o_proj(hidden_states)
+        hidden_states = residual + hidden_states
+        # MLP
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+        return hidden_states
 class SmolLM2ForCausalLM(PreTrainedModel):
     config_class = SmolLM2Config