Spaces:
Build error
Build error
ready for QLoRA
Browse files- llama-factory/config/mgtv_template.yaml +42 -0
- llama-factory/config/mgtv_template_4bit.yaml +43 -0
- llm_toolkit/setup_lf.py +60 -0
- scripts/tune-lf_v2.sh +17 -0
- scripts/tune-mgtv-4bit.sh +25 -0
- scripts/tune-mgtv.sh +1 -1
llama-factory/config/mgtv_template.yaml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: ORG_NAME/MODEL_NAME
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mgtv_p2
|
12 |
+
template: CHAT_TEMPLATE
|
13 |
+
cutoff_len: 8192
|
14 |
+
max_samples: 25000
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/MODEL_NAME
|
20 |
+
logging_steps: 5
|
21 |
+
save_steps: 35
|
22 |
+
plot_loss: true
|
23 |
+
# overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 16
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 6.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 35
|
40 |
+
|
41 |
+
report_to: wandb
|
42 |
+
run_name: MODEL_NAME_lora_sft
|
llama-factory/config/mgtv_template_4bit.yaml
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: ORG_NAME/MODEL_NAME
|
3 |
+
quantization_bit: 4
|
4 |
+
|
5 |
+
### method
|
6 |
+
stage: sft
|
7 |
+
do_train: true
|
8 |
+
finetuning_type: lora
|
9 |
+
lora_target: all
|
10 |
+
|
11 |
+
### dataset
|
12 |
+
dataset: alpaca_mgtv_p2
|
13 |
+
template: CHAT_TEMPLATE
|
14 |
+
cutoff_len: 8192
|
15 |
+
max_samples: 25000
|
16 |
+
overwrite_cache: true
|
17 |
+
preprocessing_num_workers: 16
|
18 |
+
|
19 |
+
### output
|
20 |
+
output_dir: saves/MODEL_NAME
|
21 |
+
logging_steps: 5
|
22 |
+
save_steps: 35
|
23 |
+
plot_loss: true
|
24 |
+
# overwrite_output_dir: true
|
25 |
+
|
26 |
+
### train
|
27 |
+
per_device_train_batch_size: 16
|
28 |
+
gradient_accumulation_steps: 8
|
29 |
+
learning_rate: 1.0e-4
|
30 |
+
num_train_epochs: 6.0
|
31 |
+
lr_scheduler_type: cosine
|
32 |
+
warmup_ratio: 0.1
|
33 |
+
bf16: true
|
34 |
+
ddp_timeout: 180000000
|
35 |
+
|
36 |
+
### eval
|
37 |
+
val_size: 0.01
|
38 |
+
per_device_eval_batch_size: 1
|
39 |
+
eval_strategy: steps
|
40 |
+
eval_steps: 35
|
41 |
+
|
42 |
+
report_to: wandb
|
43 |
+
run_name: MODEL_NAME_lora_sft
|
llm_toolkit/setup_lf.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, json, sys, yaml
|
2 |
+
from dotenv import find_dotenv, load_dotenv
|
3 |
+
|
4 |
+
found_dotenv = find_dotenv(".env")
|
5 |
+
|
6 |
+
if len(found_dotenv) == 0:
|
7 |
+
found_dotenv = find_dotenv(".env.example")
|
8 |
+
print(f"loading env vars from: {found_dotenv}")
|
9 |
+
load_dotenv(found_dotenv, override=False)
|
10 |
+
|
11 |
+
path = os.path.dirname(found_dotenv)
|
12 |
+
print(f"Adding {path} to sys.path")
|
13 |
+
sys.path.append(path)
|
14 |
+
|
15 |
+
from llm_toolkit.llm_utils import *
|
16 |
+
from llm_toolkit.logical_reasoning_utils import *
|
17 |
+
|
18 |
+
org_name = os.getenv("ORG_NAME")
|
19 |
+
model_name = os.getenv("MODEL_NAME")
|
20 |
+
chat_template = os.getenv("CHAT_TEMPLATE")
|
21 |
+
filename = os.getenv("YAML")
|
22 |
+
data_path = os.getenv("DATA_PATH")
|
23 |
+
|
24 |
+
print(org_name, model_name, chat_template, filename, data_path)
|
25 |
+
|
26 |
+
if not filename:
|
27 |
+
print("Error: Environment variable YAML not set")
|
28 |
+
sys.exit(1)
|
29 |
+
|
30 |
+
if not os.path.exists(filename):
|
31 |
+
print(f"Error: File {filename} not found")
|
32 |
+
sys.exit(1)
|
33 |
+
|
34 |
+
file = open(filename)
|
35 |
+
yaml_content = file.read()
|
36 |
+
file.close()
|
37 |
+
|
38 |
+
keys = ["ORG_NAME", "MODEL_NAME", "CHAT_TEMPLATE"]
|
39 |
+
for key in keys:
|
40 |
+
yaml_content = yaml_content.replace(key, os.getenv(key))
|
41 |
+
|
42 |
+
# print(f"YAML content:\n{yaml_content}")
|
43 |
+
parts = filename.split("/")
|
44 |
+
parts[-1] = "models"
|
45 |
+
parts.append(f"{os.getenv('MODEL_NAME')}.yaml")
|
46 |
+
filename = "/".join(parts)
|
47 |
+
print(f"Writing to {filename}")
|
48 |
+
|
49 |
+
# Create the parent directory if it doesn't exist
|
50 |
+
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
51 |
+
|
52 |
+
file = open(filename, "w")
|
53 |
+
file.write(yaml_content)
|
54 |
+
file.close()
|
55 |
+
|
56 |
+
y = yaml.safe_load(open(filename))
|
57 |
+
print(f"{filename}:\n", json.dumps(y, indent=2))
|
58 |
+
|
59 |
+
dataset = load_alpaca_data(data_path, using_p1=False)
|
60 |
+
print_row_details(dataset, [0, -1])
|
scripts/tune-lf_v2.sh
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/../llama-factory/
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
export ORG_NAME=$1
|
9 |
+
export MODEL_NAME=$2
|
10 |
+
export CHAT_TEMPLATE=$3
|
11 |
+
export DATA_PATH=../datasets/mgtv
|
12 |
+
export YAML=config/mgtv_template_4bit.yaml
|
13 |
+
|
14 |
+
export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"
|
15 |
+
|
16 |
+
python ../llm_toolkit/setup_lf.py
|
17 |
+
llamafactory-cli train config/models/$MODEL_NAME.yaml
|
scripts/tune-mgtv-4bit.sh
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/..
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
BASEDIR=`pwd`
|
9 |
+
|
10 |
+
nvidia-smi
|
11 |
+
uname -a
|
12 |
+
cat /etc/os-release
|
13 |
+
lscpu
|
14 |
+
grep MemTotal /proc/meminfo
|
15 |
+
|
16 |
+
# pip install --upgrade transformers
|
17 |
+
|
18 |
+
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
19 |
+
|
20 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_4bit.csv
|
21 |
+
|
22 |
+
$BASEDIR/scripts/tune-lf_v2.sh Qwen Qwen2-72B-Instruct qwen
|
23 |
+
|
24 |
+
$BASEDIR/scripts/tune-lf_v2.sh shenzhi-wang Llama3.1-70B-Chinese-Chat llama3
|
25 |
+
|
scripts/tune-mgtv.sh
CHANGED
@@ -1 +1 @@
|
|
1 |
-
tune-mgtv-
|
|
|
1 |
+
tune-mgtv-4bit.sh
|