Update README.md
Browse files
README.md
CHANGED
@@ -1,5 +1,108 @@
|
|
1 |
-
---
|
2 |
-
license: other
|
3 |
-
license_name: exaone
|
4 |
-
license_link: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/LICENSE
|
5 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: other
|
3 |
+
license_name: exaone
|
4 |
+
license_link: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/LICENSE
|
5 |
+
---
|
6 |
+
[LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct](LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
|
7 |
+
|
8 |
+
작업해주신 maywell/EXAONE-3.0-7.8B-Instruct-Llamafied을 참고해서 변경했습니다.
|
9 |
+
GPU 자원이 없으시면 사용하시면 됩니다.
|
10 |
+
|
11 |
+
```
|
12 |
+
import torch
|
13 |
+
import gc
|
14 |
+
|
15 |
+
from transformers import LlamaConfig, LlamaForCausalLM, AutoModelForCausalLM, AutoTokenizer
|
16 |
+
from tqdm import tqdm
|
17 |
+
|
18 |
+
def unload_model(model):
|
19 |
+
del model
|
20 |
+
gc.collect()
|
21 |
+
if torch.cuda.is_available():
|
22 |
+
torch.cuda.empty_cache()
|
23 |
+
|
24 |
+
def create_llama_config(exaone_config):
|
25 |
+
return LlamaConfig(
|
26 |
+
vocab_size=exaone_config.vocab_size,
|
27 |
+
hidden_size=exaone_config.hidden_size,
|
28 |
+
intermediate_size=exaone_config.intermediate_size,
|
29 |
+
num_hidden_layers=exaone_config.num_layers,
|
30 |
+
num_attention_heads=exaone_config.num_attention_heads,
|
31 |
+
max_position_embeddings=exaone_config.max_position_embeddings,
|
32 |
+
rms_norm_eps=exaone_config.layer_norm_epsilon,
|
33 |
+
num_key_value_heads=exaone_config.num_key_value_heads,
|
34 |
+
rope_theta=exaone_config.rope_theta,
|
35 |
+
bos_token_id=exaone_config.bos_token_id,
|
36 |
+
eos_token_id=exaone_config.eos_token_id,
|
37 |
+
pad_token_id=exaone_config.pad_token_id,
|
38 |
+
attention_bias=False,
|
39 |
+
)
|
40 |
+
|
41 |
+
def copy_embedding_weights(llama_model, exaone_model):
|
42 |
+
llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(torch.float16)
|
43 |
+
|
44 |
+
def copy_layer_weights(llama_layer, exaone_layer):
|
45 |
+
# Self-attention
|
46 |
+
llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(torch.float16)
|
47 |
+
llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(torch.float16)
|
48 |
+
llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(torch.float16)
|
49 |
+
llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(torch.float16)
|
50 |
+
# MLP
|
51 |
+
llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(torch.float16)
|
52 |
+
llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(torch.float16)
|
53 |
+
llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(torch.float16)
|
54 |
+
# Layer Norms
|
55 |
+
llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(torch.float16)
|
56 |
+
llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(torch.float16)
|
57 |
+
|
58 |
+
def copy_final_weights(llama_model, exaone_model):
|
59 |
+
llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(torch.float16)
|
60 |
+
llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(torch.float16)
|
61 |
+
|
62 |
+
def port_exaone_to_llama(exaone_model_path, llama_model_path):
|
63 |
+
print("Loading EXAONE model and tokenizer...")
|
64 |
+
exaone_model = AutoModelForCausalLM.from_pretrained(exaone_model_path, torch_dtype=torch.float16, device_map="cpu", trust_remote_code=True)
|
65 |
+
exaone_tokenizer = AutoTokenizer.from_pretrained(exaone_model_path, trust_remote_code=True)
|
66 |
+
exaone_config = exaone_model.config
|
67 |
+
|
68 |
+
print("Creating Llama configuration...")
|
69 |
+
llama_config = create_llama_config(exaone_config)
|
70 |
+
|
71 |
+
print("Initializing Llama model...")
|
72 |
+
llama_model = LlamaForCausalLM(llama_config)
|
73 |
+
llama_model.to(torch.float16)
|
74 |
+
llama_model.to('cpu')
|
75 |
+
|
76 |
+
print("Copying weights...")
|
77 |
+
with torch.no_grad():
|
78 |
+
copy_embedding_weights(llama_model, exaone_model)
|
79 |
+
|
80 |
+
for i in tqdm(range(exaone_config.num_layers), desc="Copying layers"):
|
81 |
+
copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i])
|
82 |
+
if i % 10 == 0: # Garbage collection every 10 layers
|
83 |
+
gc.collect()
|
84 |
+
if torch.cuda.is_available():
|
85 |
+
torch.cuda.empty_cache()
|
86 |
+
|
87 |
+
copy_final_weights(llama_model, exaone_model)
|
88 |
+
|
89 |
+
print("Unloading EXAONE model to free memory...")
|
90 |
+
unload_model(exaone_model)
|
91 |
+
|
92 |
+
print(f"Saving ported Llama model and tokenizer to {llama_model_path}")
|
93 |
+
llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="1GB")
|
94 |
+
exaone_tokenizer.save_pretrained(llama_model_path)
|
95 |
+
|
96 |
+
print("Unloading Llama model...")
|
97 |
+
unload_model(llama_model)
|
98 |
+
|
99 |
+
print(f"EXAONE model successfully ported to Llama format and saved at {llama_model_path}")
|
100 |
+
|
101 |
+
if __name__ == "__main__":
|
102 |
+
exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
|
103 |
+
llama_model_path = "./exa_llamafied"
|
104 |
+
port_exaone_to_llama(exaone_model_path, llama_model_path)
|
105 |
+
```
|
106 |
+
|
107 |
+
|
108 |
+
모델을 공개해주신 LG AI Research분들께 감사의 말씀 드립니다.
|