Curt-Park
commited on
Commit
·
966f777
1
Parent(s):
6ed5945
Add fastertransformer model
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Makefile +7 -0
- huggingface_gptj_ckpt_convert.py +189 -0
- model_repository/codegen-350M-mono-gptj/1/.tmp +0 -0
- model_repository/codegen-350M-mono-gptj/1/config.ini +11 -0
- model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.weight.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.bias.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.dense.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.query_key_value.weight.0.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.bias.bin +3 -0
- model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.weight.bin +3 -0
Makefile
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
TRITON_CONTAINER_NAME=registry.gitlab.com/curt-park/tritonserver-ft
|
2 |
+
TRITON_VERSION=22.12
|
3 |
+
|
4 |
+
triton:
|
5 |
+
docker run --gpus "device=0" --shm-size=4G --rm \
|
6 |
+
-p 8000:8000 -p 8001:8001 -p 8002:8002 -v $(PWD)/model_repository:/models \
|
7 |
+
$(TRITON_CONTAINER_NAME):$(TRITON_VERSION) tritonserver --model-repository=/models
|
huggingface_gptj_ckpt_convert.py
ADDED
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Huggingface model coverter to FastTransformer.
|
2 |
+
|
3 |
+
Reference:
|
4 |
+
https://github.com/NVIDIA/FasterTransformer/tree/main/examples/pytorch/gptj/utils
|
5 |
+
"""
|
6 |
+
import configparser
|
7 |
+
from argparse import ArgumentParser
|
8 |
+
from os import makedirs
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
import numpy as np
|
12 |
+
import torch
|
13 |
+
from transformers import PretrainedConfig
|
14 |
+
|
15 |
+
torch.set_printoptions(linewidth=130, sci_mode=False)
|
16 |
+
np.set_printoptions(linewidth=130, suppress=True)
|
17 |
+
|
18 |
+
# This converter is used to convert the huggingface moyix/codegen-350M-mono-gptj model.
|
19 |
+
|
20 |
+
|
21 |
+
def savebin(param, save_path):
|
22 |
+
if isinstance(param, torch.Tensor):
|
23 |
+
param = param.cpu().float().numpy()
|
24 |
+
np.squeeze(param).astype(np.float32).tofile(save_path + ".bin")
|
25 |
+
|
26 |
+
|
27 |
+
def param2file(pt_param, layer_id, save_dir, dest_key):
|
28 |
+
base_n = save_dir + "/model.layers." + str(layer_id) + "."
|
29 |
+
save_path = base_n + dest_key
|
30 |
+
savebin(pt_param, save_path)
|
31 |
+
|
32 |
+
|
33 |
+
def param2distributed(
|
34 |
+
pt_param,
|
35 |
+
layer_id,
|
36 |
+
save_dir,
|
37 |
+
dest_key,
|
38 |
+
n_inference_gpus,
|
39 |
+
split_axis,
|
40 |
+
):
|
41 |
+
np_param = pt_param.cpu().float().numpy()
|
42 |
+
base_n = save_dir + "/model.layers." + str(layer_id) + "."
|
43 |
+
save_path = base_n + dest_key
|
44 |
+
split_param = np.split(np_param, n_inference_gpus, axis=split_axis)
|
45 |
+
for i, p in enumerate(split_param):
|
46 |
+
savebin(p, save_path + f".{i}")
|
47 |
+
|
48 |
+
|
49 |
+
def save(w, save_dir, n_inference_gpus, n_layers, layer_id):
|
50 |
+
makedirs(save_dir, exist_ok=True)
|
51 |
+
|
52 |
+
savebin(w["transformer.wte.weight"], save_dir + "/model.wte")
|
53 |
+
l = layer_id
|
54 |
+
print(f"Saving layer {l + 1} / {n_layers}")
|
55 |
+
base_k = "transformer.h." + str(l) + "."
|
56 |
+
param2file(w[base_k + "ln_1.bias"], l, save_dir, "input_layernorm.bias")
|
57 |
+
param2file(w[base_k + "ln_1.weight"], l, save_dir, "input_layernorm.weight")
|
58 |
+
param2distributed(
|
59 |
+
w[base_k + "mlp.fc_in.weight"].T,
|
60 |
+
l,
|
61 |
+
save_dir,
|
62 |
+
"mlp.dense_h_to_4h.weight",
|
63 |
+
n_inference_gpus,
|
64 |
+
split_axis=-1, # split fast indx
|
65 |
+
)
|
66 |
+
param2distributed(
|
67 |
+
w[base_k + "mlp.fc_in.bias"],
|
68 |
+
l,
|
69 |
+
save_dir,
|
70 |
+
"mlp.dense_h_to_4h.bias",
|
71 |
+
n_inference_gpus,
|
72 |
+
split_axis=-1, # split fast indx
|
73 |
+
)
|
74 |
+
|
75 |
+
param2distributed(
|
76 |
+
w[base_k + "mlp.fc_out.weight"].T,
|
77 |
+
l,
|
78 |
+
save_dir,
|
79 |
+
"mlp.dense_4h_to_h.weight",
|
80 |
+
n_inference_gpus,
|
81 |
+
split_axis=0, # split slow indx
|
82 |
+
)
|
83 |
+
param2file(w[base_k + "mlp.fc_out.bias"], l, save_dir, "mlp.dense_4h_to_h.bias")
|
84 |
+
param2distributed(
|
85 |
+
w[base_k + "attn.out_proj.weight"].T,
|
86 |
+
l,
|
87 |
+
save_dir,
|
88 |
+
"attention.dense.weight",
|
89 |
+
n_inference_gpus,
|
90 |
+
split_axis=0, # split slow indx
|
91 |
+
)
|
92 |
+
QKV_w = torch.stack(
|
93 |
+
[
|
94 |
+
w[base_k + "attn.q_proj.weight"],
|
95 |
+
w[base_k + "attn.k_proj.weight"],
|
96 |
+
w[base_k + "attn.v_proj.weight"],
|
97 |
+
]
|
98 |
+
) # [qkv, n_heads * dim_head, latent_space]
|
99 |
+
QKV_w = QKV_w.permute(2, 0, 1)
|
100 |
+
param2distributed(
|
101 |
+
QKV_w,
|
102 |
+
l,
|
103 |
+
save_dir,
|
104 |
+
"attention.query_key_value.weight",
|
105 |
+
n_inference_gpus,
|
106 |
+
split_axis=-1, # split fast indx
|
107 |
+
)
|
108 |
+
# Other unneeded per-layer params:
|
109 |
+
# attn.attention.masked_bias = torch.tensor(-1e9)
|
110 |
+
# attn.attention.bias = torch.tril(torch.ones(1, 1, 2048, 2048))
|
111 |
+
|
112 |
+
|
113 |
+
if __name__ == "__main__":
|
114 |
+
parser = ArgumentParser(
|
115 |
+
description="Convert GPT-J slim checkpoint to FasterTransformer",
|
116 |
+
)
|
117 |
+
parser.add_argument(
|
118 |
+
"--output-dir",
|
119 |
+
help="Folder where binary files are stored",
|
120 |
+
default="c-models/",
|
121 |
+
)
|
122 |
+
parser.add_argument(
|
123 |
+
"--ckpt-dir",
|
124 |
+
help="File of GPT-J huggingface checkpoint",
|
125 |
+
default="./"
|
126 |
+
)
|
127 |
+
parser.add_argument(
|
128 |
+
"--n-inference-gpus",
|
129 |
+
help="Number of GPUs used for inference runtime",
|
130 |
+
default=1,
|
131 |
+
type=int,
|
132 |
+
)
|
133 |
+
parser.add_argument(
|
134 |
+
"--n-layers", help="Number of GPT-J decoder layer", default=20, type=int
|
135 |
+
)
|
136 |
+
args = parser.parse_args()
|
137 |
+
|
138 |
+
ckpt_file = args.ckpt_dir + "/pytorch_model.bin"
|
139 |
+
checkpoint = torch.load(ckpt_file)
|
140 |
+
print(f"loading from {ckpt_file}")
|
141 |
+
|
142 |
+
out_path = args.output_dir
|
143 |
+
output_dir = out_path + f"/{args.n_inference_gpus}-gpu/"
|
144 |
+
print(f"saving to {output_dir}")
|
145 |
+
|
146 |
+
config_file = args.ckpt_dir + "/config.json"
|
147 |
+
hf_config = PretrainedConfig.from_json_file(config_file).to_dict()
|
148 |
+
|
149 |
+
# NOTE: save parameters to config files (loaded by triton backends)
|
150 |
+
config = configparser.ConfigParser()
|
151 |
+
config["gptj"] = {}
|
152 |
+
try:
|
153 |
+
config["gptj"]["model_name"] = (
|
154 |
+
"gptj" if hf_config["_name_or_path"] == "" else hf_config["_name_or_path"]
|
155 |
+
)
|
156 |
+
config["gptj"]["head_num"] = str(hf_config["n_head"])
|
157 |
+
n_embd = hf_config["n_embd"]
|
158 |
+
config["gptj"]["size_per_head"] = str(n_embd // hf_config["n_head"])
|
159 |
+
config["gptj"]["inter_size"] = str(n_embd * 4)
|
160 |
+
config["gptj"]["num_layer"] = str(hf_config["n_layer"])
|
161 |
+
rotary_dim = (
|
162 |
+
n_embd // hf_config["n_head"]
|
163 |
+
if hf_config["rotary_dim"] is None
|
164 |
+
else hf_config["rotary_dim"]
|
165 |
+
)
|
166 |
+
config["gptj"]["rotary_embedding"] = str(hf_config["rotary_dim"])
|
167 |
+
config["gptj"]["vocab_size"] = str(hf_config["vocab_size"])
|
168 |
+
config["gptj"]["start_id"] = str(hf_config["bos_token_id"])
|
169 |
+
config["gptj"]["end_id"] = str(hf_config["eos_token_id"])
|
170 |
+
config["gptj"]["weight_data_type"] = "fp32"
|
171 |
+
Path(output_dir).mkdir(exist_ok=True, parents=True)
|
172 |
+
with open(output_dir + "/config.ini", "w") as configfile:
|
173 |
+
config.write(configfile)
|
174 |
+
except:
|
175 |
+
print(f"Fail to save the config in config.ini.")
|
176 |
+
|
177 |
+
for i in range(args.n_layers):
|
178 |
+
save(checkpoint, output_dir, args.n_inference_gpus, args.n_layers, i)
|
179 |
+
savebin(
|
180 |
+
checkpoint["transformer.ln_f.weight"],
|
181 |
+
output_dir + "/model.final_layernorm.weight",
|
182 |
+
)
|
183 |
+
savebin(
|
184 |
+
checkpoint["transformer.ln_f.bias"], output_dir + "/model.final_layernorm.bias"
|
185 |
+
)
|
186 |
+
savebin(checkpoint["lm_head.weight"], output_dir + "/model.lm_head.weight")
|
187 |
+
savebin(checkpoint["lm_head.bias"], output_dir + "/model.lm_head.bias")
|
188 |
+
|
189 |
+
print("done")
|
model_repository/codegen-350M-mono-gptj/1/.tmp
ADDED
File without changes
|
model_repository/codegen-350M-mono-gptj/1/config.ini
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[gptj]
|
2 |
+
model_name = codegen-350M-mono-gptj.modeldir
|
3 |
+
head_num = 16
|
4 |
+
size_per_head = 64
|
5 |
+
inter_size = 4096
|
6 |
+
num_layer = 20
|
7 |
+
rotary_embedding = 32
|
8 |
+
vocab_size = 51200
|
9 |
+
start_id = 1
|
10 |
+
end_id = 2
|
11 |
+
weight_data_type = fp16
|
model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6baadc4e864dfa0c53c3341dc67099ed09289698d544e634bacb08486a180528
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.final_layernorm.weight.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcb310b47f71b0c56f1297e99dbad24c9ef9f0a81c9bc18fa29d82f0bcc08e91
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.dense.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c1dcf7be0863950cc63acd4f2485f7679d117419d17fc38c823674bb33a220e
|
3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.attention.query_key_value.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6af343be9479b9ffd272765ea7cce456f3f13f772fadd9902d19ea45fc971d1b
|
3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f315eb011965fa6db1d263f4544ef6e41c060acf2afe3a4e27a803d2200a7c99
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.input_layernorm.weight.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f2ff6f3e68f1134f1d335f9114b3e990088d818527de82979a5c39064bde295
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f219b47cf228f28bb2f1ca91f51fc94f73c740c2177ec195f6124f04f9c62c2
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_4h_to_h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3db2a87c94b1199184ae3100aaf7aebd529f7580698f1c5f9d68e354b4ca1462
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.bias.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22cf998991af5d8a7bc36d08bf7662b367cbe551873819159258fa274e3594f2
|
3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.0.mlp.dense_h_to_4h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a102b9c88fc373088053ff00c4941bcccab807596341d9f85f72806041d9a556
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.dense.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:243c25a80fca60c324fdeac51834ca7b778ff44a2ddca1e850c69d80b317af76
|
3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.attention.query_key_value.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47f7cf47b6a572476edcc6301b33aaac926c3c1a28da028a319780208ebdaba6
|
3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13715f7522d1fbc56c6faa84022665c78b8e31a73fab1c405184ae28d7dc7120
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.input_layernorm.weight.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8def3f2b5036fb8eff90deb32abe452d015c1006855e74696b854c254f7006a9
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:202128f16470b7491bee6a1b7f4cc73a38969c9b7092eb0f218acf2dea26a9ab
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_4h_to_h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:025555851b1722d58333dfdee090054f84aa58f8debc85796e6bd3352ce8ad3b
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.bias.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d5e12312267825cf338344300d163a4cb203347f08dd6a0c3dc7158ab072bb9
|
3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.1.mlp.dense_h_to_4h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3bde7b51d3bff88b9caa4488eaa1b0f95fcaf807bf23d6291f355a60f998435
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.dense.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9727928fc6c85917f01692245748e9495d3f1f81b1ba7c644cfa69165e4697a
|
3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.attention.query_key_value.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:869e375c28a90cf5afc071d9a400aada12afdab863d431dd9de89e03a48fb76f
|
3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed4a6ff55b24cf68c72fd5e0166ff92a1e9b3386f8a2594c1bc92a601dcd9c25
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.input_layernorm.weight.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f901163e6531520d7fdf85516b2848da3af7795bb4d89b18242d11e9bf5fa92
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90141c941963c3e6f673e23e5f916ced5427f04fbf35c4ddd1a2b65c562d26ba
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_4h_to_h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eca2280290060ea1ebcf939255556277600edcd06ed0a193352fa86386ddfccb
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.bias.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28dcce9330a8980d959f09d1225e3493c6277def4b0e821dcb3f0fb6c6d785f9
|
3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.10.mlp.dense_h_to_4h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f6cc56de1d451c3026fa36389df2a15bfeea8b82e17d35556ac45ccdb1ff41a
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.dense.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:527ecabc9993548e5ab44d02a9d50ac5438ed8d7cc9877c3bd568282af05a150
|
3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.attention.query_key_value.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:452f635467e63d403ae8290171d36442999e54194cf99a917cbee6c68c0746da
|
3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc785b33a9ec3058514bfc2961ddb0eeff416aa59abf1a6aaae854c3e389a672
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.input_layernorm.weight.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00e30bb025be916ec8c6e7e4a9017f55a10a3dd165062952de80c7d6727fc569
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:743698edcab6405ec376a346d9dcea77158557896ebd31bc802b436d2c926b3f
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_4h_to_h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b15fd6bdc29d6e873ce030f9218153b260b540b8d8b1784da7c0da35b957d85b
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.bias.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d88855ac27749c80b4e7bf0c56ea865994ba7dbca813ef0d7f2abf03dd1ff7b
|
3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.11.mlp.dense_h_to_4h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7e1da468c678728282689498134678867f8170adc6a5146c01b448b75c0ca2d
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.dense.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b880954fcff18203fdb5d0e9bcc91ae8cb4353eb8ce653d40e714ed13c0f2416
|
3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.attention.query_key_value.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eebf29afbd6dc7bca9a6b4c7c908c3c6899d0d0babe38da051593d5fbe4467ea
|
3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da5dabbde26c2a8e26563c77d784fece6aa31c9b0e5d3a7a1bfc9bacb14f3190
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.input_layernorm.weight.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94da027e1b7e1904de1be8edadaab75208d0691a3461ed8f85508aaf1f663b1a
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c25103cd0354680d6cef87b2043e39a2d24766b798feec5f2133b9de9e26b4d
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_4h_to_h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a08674f5549b01d5e001bff7da0543561af8ac994b7e863f5a8682b9c711137
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.bias.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:099f92ef914830dab645ac9f324c91190ec88b0ef357b1ecfab8385bb05124b0
|
3 |
+
size 16384
|
model_repository/codegen-350M-mono-gptj/1/model.layers.12.mlp.dense_h_to_4h.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8aa5cf3d95ef48bc8674f006f6254a89de6def30256f3aaa4d639062b975cf34
|
3 |
+
size 16777216
|
model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.dense.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbd19b1dad736b78b633ee056c782f0613a20ed7e5aacc629506863a9055b9b5
|
3 |
+
size 4194304
|
model_repository/codegen-350M-mono-gptj/1/model.layers.13.attention.query_key_value.weight.0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17c89b990942abbe469ed7b0379631b2a698b18c66e867a5ad277648f8c75eb9
|
3 |
+
size 12582912
|
model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.bias.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22dab386db6ff716ac91335582aaf4fe2844e5d657d47a8e3c39154f75188661
|
3 |
+
size 4096
|
model_repository/codegen-350M-mono-gptj/1/model.layers.13.input_layernorm.weight.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe7a64a19a0a0288e56cddfb3eb9496641bc8c64b633ae0f454403f36f7bd989
|
3 |
+
size 4096
|