CarrotAI commited on
Commit
f2295d2
·
verified ·
1 Parent(s): 1ccce6f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +108 -5
README.md CHANGED
@@ -1,5 +1,108 @@
1
- ---
2
- license: other
3
- license_name: exaone
4
- license_link: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/LICENSE
5
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: exaone
4
+ license_link: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/LICENSE
5
+ ---
6
+ [LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct](LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
7
+
8
+ 작업해주신 maywell/EXAONE-3.0-7.8B-Instruct-Llamafied을 참고해서 변경했습니다.
9
+ GPU 자원이 없으시면 사용하시면 됩니다.
10
+
11
+ ```
12
+ import torch
13
+ import gc
14
+
15
+ from transformers import LlamaConfig, LlamaForCausalLM, AutoModelForCausalLM, AutoTokenizer
16
+ from tqdm import tqdm
17
+
18
+ def unload_model(model):
19
+ del model
20
+ gc.collect()
21
+ if torch.cuda.is_available():
22
+ torch.cuda.empty_cache()
23
+
24
+ def create_llama_config(exaone_config):
25
+ return LlamaConfig(
26
+ vocab_size=exaone_config.vocab_size,
27
+ hidden_size=exaone_config.hidden_size,
28
+ intermediate_size=exaone_config.intermediate_size,
29
+ num_hidden_layers=exaone_config.num_layers,
30
+ num_attention_heads=exaone_config.num_attention_heads,
31
+ max_position_embeddings=exaone_config.max_position_embeddings,
32
+ rms_norm_eps=exaone_config.layer_norm_epsilon,
33
+ num_key_value_heads=exaone_config.num_key_value_heads,
34
+ rope_theta=exaone_config.rope_theta,
35
+ bos_token_id=exaone_config.bos_token_id,
36
+ eos_token_id=exaone_config.eos_token_id,
37
+ pad_token_id=exaone_config.pad_token_id,
38
+ attention_bias=False,
39
+ )
40
+
41
+ def copy_embedding_weights(llama_model, exaone_model):
42
+ llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(torch.float16)
43
+
44
+ def copy_layer_weights(llama_layer, exaone_layer):
45
+ # Self-attention
46
+ llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(torch.float16)
47
+ llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(torch.float16)
48
+ llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(torch.float16)
49
+ llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(torch.float16)
50
+ # MLP
51
+ llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(torch.float16)
52
+ llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(torch.float16)
53
+ llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(torch.float16)
54
+ # Layer Norms
55
+ llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(torch.float16)
56
+ llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(torch.float16)
57
+
58
+ def copy_final_weights(llama_model, exaone_model):
59
+ llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(torch.float16)
60
+ llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(torch.float16)
61
+
62
+ def port_exaone_to_llama(exaone_model_path, llama_model_path):
63
+ print("Loading EXAONE model and tokenizer...")
64
+ exaone_model = AutoModelForCausalLM.from_pretrained(exaone_model_path, torch_dtype=torch.float16, device_map="cpu", trust_remote_code=True)
65
+ exaone_tokenizer = AutoTokenizer.from_pretrained(exaone_model_path, trust_remote_code=True)
66
+ exaone_config = exaone_model.config
67
+
68
+ print("Creating Llama configuration...")
69
+ llama_config = create_llama_config(exaone_config)
70
+
71
+ print("Initializing Llama model...")
72
+ llama_model = LlamaForCausalLM(llama_config)
73
+ llama_model.to(torch.float16)
74
+ llama_model.to('cpu')
75
+
76
+ print("Copying weights...")
77
+ with torch.no_grad():
78
+ copy_embedding_weights(llama_model, exaone_model)
79
+
80
+ for i in tqdm(range(exaone_config.num_layers), desc="Copying layers"):
81
+ copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i])
82
+ if i % 10 == 0: # Garbage collection every 10 layers
83
+ gc.collect()
84
+ if torch.cuda.is_available():
85
+ torch.cuda.empty_cache()
86
+
87
+ copy_final_weights(llama_model, exaone_model)
88
+
89
+ print("Unloading EXAONE model to free memory...")
90
+ unload_model(exaone_model)
91
+
92
+ print(f"Saving ported Llama model and tokenizer to {llama_model_path}")
93
+ llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="1GB")
94
+ exaone_tokenizer.save_pretrained(llama_model_path)
95
+
96
+ print("Unloading Llama model...")
97
+ unload_model(llama_model)
98
+
99
+ print(f"EXAONE model successfully ported to Llama format and saved at {llama_model_path}")
100
+
101
+ if __name__ == "__main__":
102
+ exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
103
+ llama_model_path = "./exa_llamafied"
104
+ port_exaone_to_llama(exaone_model_path, llama_model_path)
105
+ ```
106
+
107
+
108
+ 모델을 공개해주신 LG AI Research분들께 감사의 말씀 드립니다.