mgh6 commited on
Commit
e67678f
·
verified ·
1 Parent(s): ab4dc8e

Training in progress, step 2000

Browse files
configuration_glm2.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """gLM2 model configuration"""
2
+
3
+ from typing import Optional
4
+ from transformers import PretrainedConfig
5
+ from transformers.utils import logging
6
+
7
+ logger = logging.get_logger(__name__)
8
+
9
+
10
+ class gLM2Config(PretrainedConfig):
11
+ model_type = "gLM2"
12
+
13
+ def __init__(
14
+ self,
15
+ dim: int = 640,
16
+ depth: int = 30,
17
+ heads: int = 10,
18
+ vocab_size: int = 4160,
19
+ swiglu_multiple_of: int = 256,
20
+ ffn_dim_multiplier: Optional[float] = None,
21
+ norm_eps: float = 1e-5,
22
+ **kwargs
23
+ ):
24
+ super().__init__(**kwargs)
25
+ self.dim = dim
26
+ self.depth = depth
27
+ self.heads = heads
28
+ self.vocab_size = vocab_size
29
+ self.swiglu_multiple_of = swiglu_multiple_of
30
+ self.ffn_dim_multiplier = ffn_dim_multiplier
31
+ self.norm_eps = norm_eps
32
+
33
+ self.auto_map = {
34
+ "AutoConfig": "configuration_glm2.gLM2Config",
35
+ "AutoModel": "modeling_glm2.gLM2Model",
36
+ "AutoModelForMaskedLM": "modeling_glm2.gLM2ForMaskedLM"
37
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e047e722c8d2a4adc9591b035031716df14784019153e5c2953617fb6ea1cafb
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:961e9c84f5993fddb7d9787c91bd26ad5b2cfdc5ab6a3da55a42712133ed4001
3
  size 2682482800
runs/Oct22_14-42-35_torch-flash-large-0-5/events.out.tfevents.1729608158.torch-flash-large-0-5.15432.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ba87df1a5691e2bcdf01eb7945e888258aecf93451209bc8b4d93e26cc24c36
3
- size 11044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d16de0b494f8ee644099fa039dc15371de6e7d438d95cdd9e2c28d163d0f61
3
+ size 13154