CarrotAI
/

EXAONE-3.0-7.8B-Instruct-Llamafied-8k

Safetensors

llama

Model card Files Files and versions Community

CarrotAI commited on Aug 7, 2024

Commit

f2295d2

verified ·

1 Parent(s): 1ccce6f

Update README.md

Browse files

Files changed (1) hide show

README.md +108 -5

README.md CHANGED Viewed

@@ -1,5 +1,108 @@
----
-license: other
-license_name: exaone
-license_link: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/LICENSE
----

+---
+license: other
+license_name: exaone
+license_link: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/LICENSE
+---
+[LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct](LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
+작업해주신 maywell/EXAONE-3.0-7.8B-Instruct-Llamafied을 참고해서 변경했습니다.
+GPU 자원이 없으시면 사용하시면 됩니다.
+```
+import torch
+import gc
+from transformers import LlamaConfig, LlamaForCausalLM, AutoModelForCausalLM, AutoTokenizer
+from tqdm import tqdm
+def unload_model(model):
+    del model
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+def create_llama_config(exaone_config):
+    return LlamaConfig(
+        vocab_size=exaone_config.vocab_size,
+        hidden_size=exaone_config.hidden_size,
+        intermediate_size=exaone_config.intermediate_size,
+        num_hidden_layers=exaone_config.num_layers,
+        num_attention_heads=exaone_config.num_attention_heads,
+        max_position_embeddings=exaone_config.max_position_embeddings,
+        rms_norm_eps=exaone_config.layer_norm_epsilon,
+        num_key_value_heads=exaone_config.num_key_value_heads,
+        rope_theta=exaone_config.rope_theta,
+        bos_token_id=exaone_config.bos_token_id,
+        eos_token_id=exaone_config.eos_token_id,
+        pad_token_id=exaone_config.pad_token_id,
+        attention_bias=False,
+    )
+def copy_embedding_weights(llama_model, exaone_model):
+    llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(torch.float16)
+def copy_layer_weights(llama_layer, exaone_layer):
+    # Self-attention
+    llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(torch.float16)
+    llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(torch.float16)
+    llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(torch.float16)
+    llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(torch.float16)
+    # MLP
+    llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(torch.float16)
+    llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(torch.float16)
+    llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(torch.float16)
+    # Layer Norms
+    llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(torch.float16)
+    llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(torch.float16)
+def copy_final_weights(llama_model, exaone_model):
+    llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(torch.float16)
+    llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(torch.float16)
+def port_exaone_to_llama(exaone_model_path, llama_model_path):
+    print("Loading EXAONE model and tokenizer...")
+    exaone_model = AutoModelForCausalLM.from_pretrained(exaone_model_path, torch_dtype=torch.float16, device_map="cpu", trust_remote_code=True)
+    exaone_tokenizer = AutoTokenizer.from_pretrained(exaone_model_path, trust_remote_code=True)
+    exaone_config = exaone_model.config
+    print("Creating Llama configuration...")
+    llama_config = create_llama_config(exaone_config)
+    print("Initializing Llama model...")
+    llama_model = LlamaForCausalLM(llama_config)
+    llama_model.to(torch.float16)
+    llama_model.to('cpu')
+    print("Copying weights...")
+    with torch.no_grad():
+        copy_embedding_weights(llama_model, exaone_model)
+        for i in tqdm(range(exaone_config.num_layers), desc="Copying layers"):
+            copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i])
+            if i % 10 == 0:  # Garbage collection every 10 layers
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+        copy_final_weights(llama_model, exaone_model)
+    print("Unloading EXAONE model to free memory...")
+    unload_model(exaone_model)
+    print(f"Saving ported Llama model and tokenizer to {llama_model_path}")
+    llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="1GB")
+    exaone_tokenizer.save_pretrained(llama_model_path)
+    print("Unloading Llama model...")
+    unload_model(llama_model)
+    print(f"EXAONE model successfully ported to Llama format and saved at {llama_model_path}")
+if __name__ == "__main__":
+    exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
+    llama_model_path = "./exa_llamafied"
+    port_exaone_to_llama(exaone_model_path, llama_model_path)
+```
+모델을 공개해주신 LG AI Research분들께 감사의 말씀 드립니다.