IntelLabs
/

LlavaOLMoBitnet1B

naveensp commited on Aug 21, 2024

Commit

10b98ea

verified ·

1 Parent(s): 01270a5

Upload configuration_olmo.py with huggingface_hub

Files changed (1) hide show

configuration_olmo.py ADDED Viewed

+"""
+OLMo configuration
+"""
+from transformers import AutoConfig, PretrainedConfig
+from transformers.utils import logging
+from .config import ModelConfig
+from .aliases import PathOrStr
+from .beam_search import Sampler
+from .exceptions import OLMoError
+from .initialization import ModuleType
+from .optim import Optimizer
+from .util import StrEnum
+from .safetensors_util import STKey
+from .torch_util import seed_all
+logger = logging.get_logger(__name__)
+class OLMoConfig(PretrainedConfig):
+    model_type = "olmo"
+    keys_to_ignore_at_inference = ["past_key_values"]  # TODO: confirm
+    def __init__(self, use_cache: bool = False, **kwargs):
+        model_config = ModelConfig()
+        all_kwargs = model_config.asdict()
+        all_kwargs.update(kwargs)
+        all_kwargs.update({"use_cache": use_cache})
+        all_kwargs.update(
+            {
+                "architectures": all_kwargs.get("architectures", ["OLMoModelForCausalLM"])
+                or ["OLMoModelForCausalLM"]
+            }
+        )
+        super().__init__(**all_kwargs)
+    @property
+    def num_attention_heads(self):
+        return self.n_heads
+    @property
+    def num_hidden_layers(self):
+        return self.n_layers
+    @property
+    def hidden_size(self):
+        return self.d_model
+# Register the config class so that it is available for transformer pipelines, auto-loading etc.
+# AutoConfig.register("olmo", OLMoConfig)