jatingocodeo commited on
Commit
7276d4c
·
verified ·
1 Parent(s): fee88b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -1
app.py CHANGED
@@ -1,6 +1,84 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  # Load model and tokenizer
6
  model_id = "jatingocodeo/SmolLM2"
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, PretrainedConfig
3
  import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ import math
7
+
8
+ # Model architecture definition
9
+ class SmolLM2Config(PretrainedConfig):
10
+ model_type = "smollm2"
11
+
12
+ def __init__(
13
+ self,
14
+ vocab_size=49152,
15
+ hidden_size=576,
16
+ intermediate_size=1536,
17
+ num_hidden_layers=30,
18
+ num_attention_heads=9,
19
+ num_key_value_heads=3,
20
+ hidden_act="silu",
21
+ max_position_embeddings=2048,
22
+ initializer_range=0.041666666666666664,
23
+ rms_norm_eps=1e-5,
24
+ use_cache=True,
25
+ pad_token_id=None,
26
+ bos_token_id=0,
27
+ eos_token_id=0,
28
+ tie_word_embeddings=True,
29
+ rope_theta=10000.0,
30
+ **kwargs
31
+ ):
32
+ self.vocab_size = vocab_size
33
+ self.hidden_size = hidden_size
34
+ self.intermediate_size = intermediate_size
35
+ self.num_hidden_layers = num_hidden_layers
36
+ self.num_attention_heads = num_attention_heads
37
+ self.num_key_value_heads = num_key_value_heads
38
+ self.hidden_act = hidden_act
39
+ self.max_position_embeddings = max_position_embeddings
40
+ self.initializer_range = initializer_range
41
+ self.rms_norm_eps = rms_norm_eps
42
+ self.use_cache = use_cache
43
+ self.rope_theta = rope_theta
44
+ super().__init__(
45
+ pad_token_id=pad_token_id,
46
+ bos_token_id=bos_token_id,
47
+ eos_token_id=eos_token_id,
48
+ tie_word_embeddings=tie_word_embeddings,
49
+ **kwargs
50
+ )
51
+
52
+ class SmolLM2ForCausalLM(PreTrainedModel):
53
+ config_class = SmolLM2Config
54
+
55
+ def __init__(self, config):
56
+ super().__init__(config)
57
+ self.config = config
58
+
59
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size)
60
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
61
+
62
+ if config.tie_word_embeddings:
63
+ self.lm_head.weight = self.embed_tokens.weight
64
+
65
+ def forward(self, input_ids, attention_mask=None, labels=None):
66
+ hidden_states = self.embed_tokens(input_ids)
67
+ logits = self.lm_head(hidden_states)
68
+
69
+ loss = None
70
+ if labels is not None:
71
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), labels.view(-1))
72
+
73
+ return logits if loss is None else (loss, logits)
74
+
75
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
76
+ return {"input_ids": input_ids}
77
+
78
+ # Register the model architecture
79
+ from transformers import AutoConfig, AutoModelForCausalLM
80
+ AutoConfig.register("smollm2", SmolLM2Config)
81
+ AutoModelForCausalLM.register(SmolLM2Config, SmolLM2ForCausalLM)
82
 
83
  # Load model and tokenizer
84
  model_id = "jatingocodeo/SmolLM2"