English
naveensp commited on
Commit
10b98ea
·
verified ·
1 Parent(s): 01270a5

Upload configuration_olmo.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_olmo.py +52 -0
configuration_olmo.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ OLMo configuration
3
+ """
4
+
5
+ from transformers import AutoConfig, PretrainedConfig
6
+ from transformers.utils import logging
7
+
8
+ from .config import ModelConfig
9
+ from .aliases import PathOrStr
10
+ from .beam_search import Sampler
11
+ from .exceptions import OLMoError
12
+ from .initialization import ModuleType
13
+ from .optim import Optimizer
14
+ from .util import StrEnum
15
+ from .safetensors_util import STKey
16
+ from .torch_util import seed_all
17
+
18
+ logger = logging.get_logger(__name__)
19
+
20
+
21
+ class OLMoConfig(PretrainedConfig):
22
+ model_type = "olmo"
23
+ keys_to_ignore_at_inference = ["past_key_values"] # TODO: confirm
24
+
25
+ def __init__(self, use_cache: bool = False, **kwargs):
26
+ model_config = ModelConfig()
27
+ all_kwargs = model_config.asdict()
28
+ all_kwargs.update(kwargs)
29
+ all_kwargs.update({"use_cache": use_cache})
30
+ all_kwargs.update(
31
+ {
32
+ "architectures": all_kwargs.get("architectures", ["OLMoModelForCausalLM"])
33
+ or ["OLMoModelForCausalLM"]
34
+ }
35
+ )
36
+ super().__init__(**all_kwargs)
37
+
38
+ @property
39
+ def num_attention_heads(self):
40
+ return self.n_heads
41
+
42
+ @property
43
+ def num_hidden_layers(self):
44
+ return self.n_layers
45
+
46
+ @property
47
+ def hidden_size(self):
48
+ return self.d_model
49
+
50
+
51
+ # Register the config class so that it is available for transformer pipelines, auto-loading etc.
52
+ # AutoConfig.register("olmo", OLMoConfig)