Curt-Park commited on
Commit
966f777
·
1 Parent(s): 6ed5945

Add fastertransformer model

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Makefile +7 -0
  2. huggingface_gptj_ckpt_convert.py +189 -0
  3. model_repository/codegen-350M-mono-gptj/1/.tmp +0 -0
  4. model_repository/codegen-350M-mono-gptj/1/config.ini +11 -0
  5. model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.bias.bin +3 -0
  6. model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.weight.bin +3 -0
  7. model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.dense.weight.0.bin +3 -0
  8. model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.query_key_value.weight.0.bin +3 -0
  9. model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.bias.bin +3 -0
  10. model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.weight.bin +3 -0
  11. model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.bias.bin +3 -0
  12. model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.weight.0.bin +3 -0
  13. model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.bias.0.bin +3 -0
  14. model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.weight.0.bin +3 -0
  15. model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.dense.weight.0.bin +3 -0
  16. model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.query_key_value.weight.0.bin +3 -0
  17. model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.bias.bin +3 -0
  18. model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.weight.bin +3 -0
  19. model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.bias.bin +3 -0
  20. model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.weight.0.bin +3 -0
  21. model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.bias.0.bin +3 -0
  22. model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.weight.0.bin +3 -0
  23. model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.dense.weight.0.bin +3 -0
  24. model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.query_key_value.weight.0.bin +3 -0
  25. model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.bias.bin +3 -0
  26. model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.weight.bin +3 -0
  27. model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.bias.bin +3 -0
  28. model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.weight.0.bin +3 -0
  29. model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.bias.0.bin +3 -0
  30. model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.weight.0.bin +3 -0
  31. model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.dense.weight.0.bin +3 -0
  32. model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.query_key_value.weight.0.bin +3 -0
  33. model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.bias.bin +3 -0
  34. model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.weight.bin +3 -0
  35. model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.bias.bin +3 -0
  36. model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.weight.0.bin +3 -0
  37. model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.bias.0.bin +3 -0
  38. model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.weight.0.bin +3 -0
  39. model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.dense.weight.0.bin +3 -0
  40. model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.query_key_value.weight.0.bin +3 -0
  41. model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.bias.bin +3 -0
  42. model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.weight.bin +3 -0
  43. model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.bias.bin +3 -0
  44. model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.weight.0.bin +3 -0
  45. model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.bias.0.bin +3 -0
  46. model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.weight.0.bin +3 -0
  47. model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.dense.weight.0.bin +3 -0
  48. model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.query_key_value.weight.0.bin +3 -0
  49. model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.bias.bin +3 -0
  50. model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.weight.bin +3 -0
Makefile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ TRITON_CONTAINER_NAME=registry.gitlab.com/curt-park/tritonserver-ft
2
+ TRITON_VERSION=22.12
3
+
4
+ triton:
5
+ docker run --gpus "device=0" --shm-size=4G --rm \
6
+ -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $(PWD)/model_repository:/models \
7
+ $(TRITON_CONTAINER_NAME):$(TRITON_VERSION) tritonserver --model-repository=/models
huggingface_gptj_ckpt_convert.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Huggingface model coverter to FastTransformer.
2
+
3
+ Reference:
4
+ https://github.com/NVIDIA/FasterTransformer/tree/main/examples/pytorch/gptj/utils
5
+ """
6
+ import configparser
7
+ from argparse import ArgumentParser
8
+ from os import makedirs
9
+ from pathlib import Path
10
+
11
+ import numpy as np
12
+ import torch
13
+ from transformers import PretrainedConfig
14
+
15
+ torch.set_printoptions(linewidth=130, sci_mode=False)
16
+ np.set_printoptions(linewidth=130, suppress=True)
17
+
18
+ # This converter is used to convert the huggingface moyix/codegen-350M-mono-gptj model.
19
+
20
+
21
+ def savebin(param, save_path):
22
+ if isinstance(param, torch.Tensor):
23
+ param = param.cpu().float().numpy()
24
+ np.squeeze(param).astype(np.float32).tofile(save_path + ".bin")
25
+
26
+
27
+ def param2file(pt_param, layer_id, save_dir, dest_key):
28
+ base_n = save_dir + "/model.layers." + str(layer_id) + "."
29
+ save_path = base_n + dest_key
30
+ savebin(pt_param, save_path)
31
+
32
+
33
+ def param2distributed(
34
+ pt_param,
35
+ layer_id,
36
+ save_dir,
37
+ dest_key,
38
+ n_inference_gpus,
39
+ split_axis,
40
+ ):
41
+ np_param = pt_param.cpu().float().numpy()
42
+ base_n = save_dir + "/model.layers." + str(layer_id) + "."
43
+ save_path = base_n + dest_key
44
+ split_param = np.split(np_param, n_inference_gpus, axis=split_axis)
45
+ for i, p in enumerate(split_param):
46
+ savebin(p, save_path + f".{i}")
47
+
48
+
49
+ def save(w, save_dir, n_inference_gpus, n_layers, layer_id):
50
+ makedirs(save_dir, exist_ok=True)
51
+
52
+ savebin(w["transformer.wte.weight"], save_dir + "/model.wte")
53
+ l = layer_id
54
+ print(f"Saving layer {l + 1} / {n_layers}")
55
+ base_k = "transformer.h." + str(l) + "."
56
+ param2file(w[base_k + "ln_1.bias"], l, save_dir, "input_layernorm.bias")
57
+ param2file(w[base_k + "ln_1.weight"], l, save_dir, "input_layernorm.weight")
58
+ param2distributed(
59
+ w[base_k + "mlp.fc_in.weight"].T,
60
+ l,
61
+ save_dir,
62
+ "mlp.dense_h_to_4h.weight",
63
+ n_inference_gpus,
64
+ split_axis=-1, # split fast indx
65
+ )
66
+ param2distributed(
67
+ w[base_k + "mlp.fc_in.bias"],
68
+ l,
69
+ save_dir,
70
+ "mlp.dense_h_to_4h.bias",
71
+ n_inference_gpus,
72
+ split_axis=-1, # split fast indx
73
+ )
74
+
75
+ param2distributed(
76
+ w[base_k + "mlp.fc_out.weight"].T,
77
+ l,
78
+ save_dir,
79
+ "mlp.dense_4h_to_h.weight",
80
+ n_inference_gpus,
81
+ split_axis=0, # split slow indx
82
+ )
83
+ param2file(w[base_k + "mlp.fc_out.bias"], l, save_dir, "mlp.dense_4h_to_h.bias")
84
+ param2distributed(
85
+ w[base_k + "attn.out_proj.weight"].T,
86
+ l,
87
+ save_dir,
88
+ "attention.dense.weight",
89
+ n_inference_gpus,
90
+ split_axis=0, # split slow indx
91
+ )
92
+ QKV_w = torch.stack(
93
+ [
94
+ w[base_k + "attn.q_proj.weight"],
95
+ w[base_k + "attn.k_proj.weight"],
96
+ w[base_k + "attn.v_proj.weight"],
97
+ ]
98
+ ) # [qkv, n_heads * dim_head, latent_space]
99
+ QKV_w = QKV_w.permute(2, 0, 1)
100
+ param2distributed(
101
+ QKV_w,
102
+ l,
103
+ save_dir,
104
+ "attention.query_key_value.weight",
105
+ n_inference_gpus,
106
+ split_axis=-1, # split fast indx
107
+ )
108
+ # Other unneeded per-layer params:
109
+ # attn.attention.masked_bias = torch.tensor(-1e9)
110
+ # attn.attention.bias = torch.tril(torch.ones(1, 1, 2048, 2048))
111
+
112
+
113
+ if __name__ == "__main__":
114
+ parser = ArgumentParser(
115
+ description="Convert GPT-J slim checkpoint to FasterTransformer",
116
+ )
117
+ parser.add_argument(
118
+ "--output-dir",
119
+ help="Folder where binary files are stored",
120
+ default="c-models/",
121
+ )
122
+ parser.add_argument(
123
+ "--ckpt-dir",
124
+ help="File of GPT-J huggingface checkpoint",
125
+ default="./"
126
+ )
127
+ parser.add_argument(
128
+ "--n-inference-gpus",
129
+ help="Number of GPUs used for inference runtime",
130
+ default=1,
131
+ type=int,
132
+ )
133
+ parser.add_argument(
134
+ "--n-layers", help="Number of GPT-J decoder layer", default=20, type=int
135
+ )
136
+ args = parser.parse_args()
137
+
138
+ ckpt_file = args.ckpt_dir + "/pytorch_model.bin"
139
+ checkpoint = torch.load(ckpt_file)
140
+ print(f"loading from {ckpt_file}")
141
+
142
+ out_path = args.output_dir
143
+ output_dir = out_path + f"/{args.n_inference_gpus}-gpu/"
144
+ print(f"saving to {output_dir}")
145
+
146
+ config_file = args.ckpt_dir + "/config.json"
147
+ hf_config = PretrainedConfig.from_json_file(config_file).to_dict()
148
+
149
+ # NOTE: save parameters to config files (loaded by triton backends)
150
+ config = configparser.ConfigParser()
151
+ config["gptj"] = {}
152
+ try:
153
+ config["gptj"]["model_name"] = (
154
+ "gptj" if hf_config["_name_or_path"] == "" else hf_config["_name_or_path"]
155
+ )
156
+ config["gptj"]["head_num"] = str(hf_config["n_head"])
157
+ n_embd = hf_config["n_embd"]
158
+ config["gptj"]["size_per_head"] = str(n_embd // hf_config["n_head"])
159
+ config["gptj"]["inter_size"] = str(n_embd * 4)
160
+ config["gptj"]["num_layer"] = str(hf_config["n_layer"])
161
+ rotary_dim = (
162
+ n_embd // hf_config["n_head"]
163
+ if hf_config["rotary_dim"] is None
164
+ else hf_config["rotary_dim"]
165
+ )
166
+ config["gptj"]["rotary_embedding"] = str(hf_config["rotary_dim"])
167
+ config["gptj"]["vocab_size"] = str(hf_config["vocab_size"])
168
+ config["gptj"]["start_id"] = str(hf_config["bos_token_id"])
169
+ config["gptj"]["end_id"] = str(hf_config["eos_token_id"])
170
+ config["gptj"]["weight_data_type"] = "fp32"
171
+ Path(output_dir).mkdir(exist_ok=True, parents=True)
172
+ with open(output_dir + "/config.ini", "w") as configfile:
173
+ config.write(configfile)
174
+ except:
175
+ print(f"Fail to save the config in config.ini.")
176
+
177
+ for i in range(args.n_layers):
178
+ save(checkpoint, output_dir, args.n_inference_gpus, args.n_layers, i)
179
+ savebin(
180
+ checkpoint["transformer.ln_f.weight"],
181
+ output_dir + "/model.final_layernorm.weight",
182
+ )
183
+ savebin(
184
+ checkpoint["transformer.ln_f.bias"], output_dir + "/model.final_layernorm.bias"
185
+ )
186
+ savebin(checkpoint["lm_head.weight"], output_dir + "/model.lm_head.weight")
187
+ savebin(checkpoint["lm_head.bias"], output_dir + "/model.lm_head.bias")
188
+
189
+ print("done")
model_repository/codegen-350M-mono-gptj/1/.tmp ADDED
File without changes
model_repository/codegen-350M-mono-gptj/1/config.ini ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [gptj]
2
+ model_name = codegen-350M-mono-gptj.modeldir
3
+ head_num = 16
4
+ size_per_head = 64
5
+ inter_size = 4096
6
+ num_layer = 20
7
+ rotary_embedding = 32
8
+ vocab_size = 51200
9
+ start_id = 1
10
+ end_id = 2
11
+ weight_data_type = fp16
model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6baadc4e864dfa0c53c3341dc67099ed09289698d544e634bacb08486a180528
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcb310b47f71b0c56f1297e99dbad24c9ef9f0a81c9bc18fa29d82f0bcc08e91
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.dense.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1dcf7be0863950cc63acd4f2485f7679d117419d17fc38c823674bb33a220e
3
+ size 4194304
model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.query_key_value.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af343be9479b9ffd272765ea7cce456f3f13f772fadd9902d19ea45fc971d1b
3
+ size 12582912
model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f315eb011965fa6db1d263f4544ef6e41c060acf2afe3a4e27a803d2200a7c99
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f2ff6f3e68f1134f1d335f9114b3e990088d818527de82979a5c39064bde295
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f219b47cf228f28bb2f1ca91f51fc94f73c740c2177ec195f6124f04f9c62c2
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3db2a87c94b1199184ae3100aaf7aebd529f7580698f1c5f9d68e354b4ca1462
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.bias.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22cf998991af5d8a7bc36d08bf7662b367cbe551873819159258fa274e3594f2
3
+ size 16384
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a102b9c88fc373088053ff00c4941bcccab807596341d9f85f72806041d9a556
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.dense.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:243c25a80fca60c324fdeac51834ca7b778ff44a2ddca1e850c69d80b317af76
3
+ size 4194304
model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.query_key_value.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47f7cf47b6a572476edcc6301b33aaac926c3c1a28da028a319780208ebdaba6
3
+ size 12582912
model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13715f7522d1fbc56c6faa84022665c78b8e31a73fab1c405184ae28d7dc7120
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8def3f2b5036fb8eff90deb32abe452d015c1006855e74696b854c254f7006a9
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:202128f16470b7491bee6a1b7f4cc73a38969c9b7092eb0f218acf2dea26a9ab
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:025555851b1722d58333dfdee090054f84aa58f8debc85796e6bd3352ce8ad3b
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.bias.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d5e12312267825cf338344300d163a4cb203347f08dd6a0c3dc7158ab072bb9
3
+ size 16384
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3bde7b51d3bff88b9caa4488eaa1b0f95fcaf807bf23d6291f355a60f998435
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.dense.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9727928fc6c85917f01692245748e9495d3f1f81b1ba7c644cfa69165e4697a
3
+ size 4194304
model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.query_key_value.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869e375c28a90cf5afc071d9a400aada12afdab863d431dd9de89e03a48fb76f
3
+ size 12582912
model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed4a6ff55b24cf68c72fd5e0166ff92a1e9b3386f8a2594c1bc92a601dcd9c25
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f901163e6531520d7fdf85516b2848da3af7795bb4d89b18242d11e9bf5fa92
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90141c941963c3e6f673e23e5f916ced5427f04fbf35c4ddd1a2b65c562d26ba
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eca2280290060ea1ebcf939255556277600edcd06ed0a193352fa86386ddfccb
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.bias.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28dcce9330a8980d959f09d1225e3493c6277def4b0e821dcb3f0fb6c6d785f9
3
+ size 16384
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f6cc56de1d451c3026fa36389df2a15bfeea8b82e17d35556ac45ccdb1ff41a
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.dense.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527ecabc9993548e5ab44d02a9d50ac5438ed8d7cc9877c3bd568282af05a150
3
+ size 4194304
model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.query_key_value.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:452f635467e63d403ae8290171d36442999e54194cf99a917cbee6c68c0746da
3
+ size 12582912
model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc785b33a9ec3058514bfc2961ddb0eeff416aa59abf1a6aaae854c3e389a672
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00e30bb025be916ec8c6e7e4a9017f55a10a3dd165062952de80c7d6727fc569
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:743698edcab6405ec376a346d9dcea77158557896ebd31bc802b436d2c926b3f
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b15fd6bdc29d6e873ce030f9218153b260b540b8d8b1784da7c0da35b957d85b
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.bias.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d88855ac27749c80b4e7bf0c56ea865994ba7dbca813ef0d7f2abf03dd1ff7b
3
+ size 16384
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e1da468c678728282689498134678867f8170adc6a5146c01b448b75c0ca2d
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.dense.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b880954fcff18203fdb5d0e9bcc91ae8cb4353eb8ce653d40e714ed13c0f2416
3
+ size 4194304
model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.query_key_value.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eebf29afbd6dc7bca9a6b4c7c908c3c6899d0d0babe38da051593d5fbe4467ea
3
+ size 12582912
model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da5dabbde26c2a8e26563c77d784fece6aa31c9b0e5d3a7a1bfc9bacb14f3190
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94da027e1b7e1904de1be8edadaab75208d0691a3461ed8f85508aaf1f663b1a
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c25103cd0354680d6cef87b2043e39a2d24766b798feec5f2133b9de9e26b4d
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a08674f5549b01d5e001bff7da0543561af8ac994b7e863f5a8682b9c711137
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.bias.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099f92ef914830dab645ac9f324c91190ec88b0ef357b1ecfab8385bb05124b0
3
+ size 16384
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa5cf3d95ef48bc8674f006f6254a89de6def30256f3aaa4d639062b975cf34
3
+ size 16777216
model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.dense.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbd19b1dad736b78b633ee056c782f0613a20ed7e5aacc629506863a9055b9b5
3
+ size 4194304
model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.query_key_value.weight.0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c89b990942abbe469ed7b0379631b2a698b18c66e867a5ad277648f8c75eb9
3
+ size 12582912
model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.bias.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22dab386db6ff716ac91335582aaf4fe2844e5d657d47a8e3c39154f75188661
3
+ size 4096
model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe7a64a19a0a0288e56cddfb3eb9496641bc8c64b633ae0f454403f36f7bd989
3
+ size 4096