Upload NanoGPT
Browse files
model.py
CHANGED
@@ -9,13 +9,11 @@ https://github.com/huggingface/transformers/blob/main/src/transformers/models/gp
|
|
9 |
|
10 |
import math
|
11 |
import inspect
|
12 |
-
# from dataclasses import dataclass
|
13 |
|
14 |
import torch
|
15 |
import torch.nn as nn
|
16 |
from torch.nn import functional as F
|
17 |
|
18 |
-
# from huggingface_hub import PyTorchModelHubMixin
|
19 |
from transformers import AutoConfig, AutoModel, PretrainedConfig, PreTrainedModel
|
20 |
|
21 |
|
@@ -109,17 +107,6 @@ class Block(nn.Module):
|
|
109 |
x = x + self.mlp(self.ln_2(x))
|
110 |
return x
|
111 |
|
112 |
-
# @dataclass
|
113 |
-
# class GPTConfig:
|
114 |
-
# block_size: int = 1024
|
115 |
-
# vocab_size: int = 50304 # GPT-2 vocab_size of 50257, padded up to nearest multiple of 64 for efficiency
|
116 |
-
# n_layer: int = 12
|
117 |
-
# n_head: int = 12
|
118 |
-
# n_embd: int = 768
|
119 |
-
# dropout: float = 0.0
|
120 |
-
# bias: bool = True # True: bias in Linears and LayerNorms, like GPT-2. False: a bit better and faster
|
121 |
-
# outbedding_weight_tying: bool = True
|
122 |
-
|
123 |
class NanoGPTConfig(PretrainedConfig):
|
124 |
model_type = "nanoGPT"
|
125 |
|
@@ -148,16 +135,6 @@ class NanoGPTConfig(PretrainedConfig):
|
|
148 |
AutoConfig.register("nanoGPT", NanoGPTConfig)
|
149 |
|
150 |
|
151 |
-
# class NanoGPT(PreTrainedModel):
|
152 |
-
# config_class = NanoGPTConfig
|
153 |
-
|
154 |
-
# def __init__(self, config):
|
155 |
-
# super().__init__(config)
|
156 |
-
# self.model = GPT(config)
|
157 |
-
|
158 |
-
# def forward(self, *args, **kwargs):
|
159 |
-
# return self.model.forward(*args, **kwargs)
|
160 |
-
|
161 |
class NanoGPT(PreTrainedModel):
|
162 |
config_class = NanoGPTConfig
|
163 |
|
|
|
9 |
|
10 |
import math
|
11 |
import inspect
|
|
|
12 |
|
13 |
import torch
|
14 |
import torch.nn as nn
|
15 |
from torch.nn import functional as F
|
16 |
|
|
|
17 |
from transformers import AutoConfig, AutoModel, PretrainedConfig, PreTrainedModel
|
18 |
|
19 |
|
|
|
107 |
x = x + self.mlp(self.ln_2(x))
|
108 |
return x
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
class NanoGPTConfig(PretrainedConfig):
|
111 |
model_type = "nanoGPT"
|
112 |
|
|
|
135 |
AutoConfig.register("nanoGPT", NanoGPTConfig)
|
136 |
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
class NanoGPT(PreTrainedModel):
|
139 |
config_class = NanoGPTConfig
|
140 |
|