Spaces:
Runtime error
Runtime error
IGNORE_INDEX = -100 | |
LOG_FILE_NAME = "trainer_log.jsonl" | |
LAYERNORM_NAMES = ["norm", "ln_f", "ln_attn", "ln_mlp"] | |
METHODS = ["full", "freeze", "lora"] | |
TRAINING_STAGES = { | |
"Supervised Fine-Tuning": "sft", | |
"Reward Modeling": "rm", | |
"PPO": "ppo", | |
"DPO": "dpo", | |
"Pre-Training": "pt" | |
} | |
SUPPORTED_MODELS = { | |
"LLaMA-7B": "huggyllama/llama-7b", | |
"LLaMA-13B": "huggyllama/llama-13b", | |
"LLaMA-30B": "huggyllama/llama-30b", | |
"LLaMA-65B": "huggyllama/llama-65b", | |
"LLaMA2-7B": "meta-llama/Llama-2-7b-hf", | |
"LLaMA2-13B": "meta-llama/Llama-2-13b-hf", | |
"LLaMA2-70B": "meta-llama/Llama-2-70b-hf", | |
"LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf", | |
"LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf", | |
"LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf", | |
"ChineseLLaMA2-7B": "ziqingyang/chinese-llama-2-7b", | |
"ChineseLLaMA2-13B": "ziqingyang/chinese-llama-2-13b", | |
"ChineseLLaMA2-7B-Chat": "ziqingyang/chinese-alpaca-2-7b", | |
"ChineseLLaMA2-13B-Chat": "ziqingyang/chinese-alpaca-2-13b", | |
"BLOOM-560M": "bigscience/bloom-560m", | |
"BLOOM-3B": "bigscience/bloom-3b", | |
"BLOOM-7B1": "bigscience/bloom-7b1", | |
"BLOOMZ-560M": "bigscience/bloomz-560m", | |
"BLOOMZ-3B": "bigscience/bloomz-3b", | |
"BLOOMZ-7B1-mt": "bigscience/bloomz-7b1-mt", | |
"Falcon-7B": "tiiuae/falcon-7b", | |
"Falcon-7B-Chat": "tiiuae/falcon-7b-instruct", | |
"Falcon-40B": "tiiuae/falcon-40b", | |
"Falcon-40B-Chat": "tiiuae/falcon-40b-instruct", | |
"Baichuan-7B": "baichuan-inc/Baichuan-7B", | |
"Baichuan-13B": "baichuan-inc/Baichuan-13B-Base", | |
"Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Chat", | |
"Baichuan2-7B": "baichuan-inc/Baichuan2-7B-Base", | |
"Baichuan2-13B": "baichuan-inc/Baichuan2-13B-Base", | |
"Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat", | |
"Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat", | |
"InternLM-7B": "internlm/internlm-7b", | |
"InternLM-7B-Chat": "internlm/internlm-chat-7b", | |
"Qwen-7B": "Qwen/Qwen-7B", | |
"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat", | |
"XVERSE-13B": "xverse/XVERSE-13B", | |
"XVERSE-13B-Chat": "xverse/XVERSE-13B-Chat", | |
"ChatGLM2-6B-Chat": "THUDM/chatglm2-6b" | |
} | |
DEFAULT_MODULE = { | |
"LLaMA": "q_proj,v_proj", | |
"LLaMA2": "q_proj,v_proj", | |
"ChineseLLaMA2": "q_proj,v_proj", | |
"BLOOM": "query_key_value", | |
"BLOOMZ": "query_key_value", | |
"Falcon": "query_key_value", | |
"Baichuan": "W_pack", | |
"Baichuan2": "W_pack", | |
"InternLM": "q_proj,v_proj", | |
"Qwen": "c_attn", | |
"XVERSE": "q_proj,v_proj", | |
"ChatGLM2": "query_key_value" | |
} | |
DEFAULT_TEMPLATE = { | |
"LLaMA2": "llama2", | |
"ChineseLLaMA2": "llama2_zh", | |
"Baichuan": "baichuan", | |
"Baichuan2": "baichuan2", | |
"InternLM": "intern", | |
"Qwen": "chatml", | |
"XVERSE": "xverse", | |
"ChatGLM2": "chatglm2" | |
} | |