Spaces:
Runtime error
Runtime error
[update]add code
Browse files- .gitignore +6 -0
- examples/exercises/chinese_modern_poetry/1.prepare_data.py +39 -0
- examples/exercises/chinese_modern_poetry/2.train_model.py +327 -0
- examples/exercises/chinese_modern_poetry/3.merge_lora.py +70 -0
- examples/exercises/chinese_modern_poetry/4.test_model.py +104 -0
- examples/exercises/chinese_modern_poetry/run.sh +196 -0
- examples/exercises/chinese_modern_poetry/stop.sh +1 -0
- main.py +211 -0
- project_settings.py +20 -0
- requirements.txt +16 -0
- script/install_bitsandbytes.sh +232 -0
- script/install_conda.sh +58 -0
- script/install_cuda.sh +103 -0
- script/install_nvidia_driver.sh +184 -0
- script/install_openssl.sh +58 -0
- script/install_python.sh +126 -0
- toolbox/__init__.py +6 -0
- toolbox/json/__init__.py +6 -0
- toolbox/json/misc.py +63 -0
- toolbox/os/__init__.py +6 -0
- toolbox/os/environment.py +114 -0
- toolbox/os/other.py +9 -0
- toolbox/transformers/__init__.py +6 -0
- toolbox/transformers/data/__init__.py +6 -0
- toolbox/transformers/data/data_collator.py +54 -0
- toolbox/transformers/data/dataset/__init__.py +6 -0
- toolbox/transformers/data/dataset/dataset.py +79 -0
- toolbox/transformers/modules/__init__.py +6 -0
- toolbox/transformers/modules/loss.py +51 -0
- toolbox/transformers/trainer.py +99 -0
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
.git/
|
3 |
+
.idea/
|
4 |
+
|
5 |
+
**/flagged/
|
6 |
+
**/__pycache__/
|
examples/exercises/chinese_modern_poetry/1.prepare_data.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
|
5 |
+
from datasets import load_dataset
|
6 |
+
|
7 |
+
from project_settings import project_path
|
8 |
+
|
9 |
+
|
10 |
+
def get_args():
|
11 |
+
parser = argparse.ArgumentParser()
|
12 |
+
parser.add_argument("--dataset_path", default="Iess/chinese_modern_poetry", type=str)
|
13 |
+
parser.add_argument("--dataset_name", default=None, type=str)
|
14 |
+
parser.add_argument("--dataset_split", default=None, type=str)
|
15 |
+
parser.add_argument(
|
16 |
+
"--dataset_cache_dir",
|
17 |
+
default=(project_path / "hub_datasets").as_posix(),
|
18 |
+
type=str
|
19 |
+
)
|
20 |
+
args = parser.parse_args()
|
21 |
+
return args
|
22 |
+
|
23 |
+
|
24 |
+
def main():
|
25 |
+
args = get_args()
|
26 |
+
|
27 |
+
dataset = load_dataset(
|
28 |
+
path=args.dataset_path,
|
29 |
+
name=args.dataset_name,
|
30 |
+
split=args.dataset_split,
|
31 |
+
cache_dir=args.dataset_cache_dir
|
32 |
+
)
|
33 |
+
print(dataset)
|
34 |
+
|
35 |
+
return
|
36 |
+
|
37 |
+
|
38 |
+
if __name__ == '__main__':
|
39 |
+
main()
|
examples/exercises/chinese_modern_poetry/2.train_model.py
ADDED
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
from collections import defaultdict
|
5 |
+
from dataclasses import dataclass, field
|
6 |
+
import os
|
7 |
+
import platform
|
8 |
+
import sys
|
9 |
+
from typing import Optional
|
10 |
+
|
11 |
+
pwd = os.path.abspath(os.path.dirname(__file__))
|
12 |
+
sys.path.append(os.path.join(pwd, '../../../'))
|
13 |
+
|
14 |
+
import bitsandbytes as bnb
|
15 |
+
from datasets import Dataset, DatasetDict, load_dataset
|
16 |
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
17 |
+
import torch
|
18 |
+
from transformers.data.data_collator import DataCollatorForLanguageModeling
|
19 |
+
from transformers.trainer import Trainer
|
20 |
+
from transformers.training_args import TrainingArguments
|
21 |
+
from transformers.models.auto import AutoModelForCausalLM, AutoTokenizer
|
22 |
+
from transformers.utils.quantization_config import BitsAndBytesConfig
|
23 |
+
|
24 |
+
from project_settings import project_path
|
25 |
+
from toolbox.transformers.data.dataset.dataset import SFTDataset, ChatGLM2SFTDataset
|
26 |
+
from toolbox.transformers.data.data_collator import SFTDataCollator
|
27 |
+
from toolbox.transformers.modules.loss import TargetLMLoss
|
28 |
+
from toolbox.transformers.trainer import LoRATrainer
|
29 |
+
|
30 |
+
|
31 |
+
def get_args():
|
32 |
+
"""
|
33 |
+
python3 2.train_model.py --pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/pretrained_models/huggingface/YeungNLP/firefly-chatglm2-6b
|
34 |
+
|
35 |
+
"""
|
36 |
+
parser = argparse.ArgumentParser()
|
37 |
+
parser.add_argument("--dataset_path", default="Iess/chinese_modern_poetry", type=str)
|
38 |
+
parser.add_argument("--dataset_name", default=None, type=str)
|
39 |
+
parser.add_argument("--dataset_split", default=None, type=str)
|
40 |
+
parser.add_argument(
|
41 |
+
"--dataset_cache_dir",
|
42 |
+
default=(project_path / "hub_datasets").as_posix(),
|
43 |
+
type=str
|
44 |
+
)
|
45 |
+
|
46 |
+
parser.add_argument(
|
47 |
+
"--pretrained_model_name_or_path",
|
48 |
+
default="Qwen/Qwen-7B",
|
49 |
+
type=str
|
50 |
+
)
|
51 |
+
parser.add_argument("--cache_dir", default="cache_dir", type=str)
|
52 |
+
|
53 |
+
# train
|
54 |
+
parser.add_argument("--output_dir", default="serialization_dir", type=str)
|
55 |
+
parser.add_argument("--overwrite_output_dir", action="store_true")
|
56 |
+
parser.add_argument("--evaluation_strategy", default="no", choices=["no", "steps", "epoch"], type=str)
|
57 |
+
parser.add_argument("--per_device_train_batch_size", default=4, type=int)
|
58 |
+
parser.add_argument("--gradient_accumulation_steps", default=4, type=int)
|
59 |
+
parser.add_argument("--learning_rate", default=2e-4, type=float)
|
60 |
+
parser.add_argument("--weight_decay", default=0, type=float)
|
61 |
+
parser.add_argument("--max_grad_norm", default=0.3, type=float)
|
62 |
+
parser.add_argument("--num_train_epochs", default=1.0, type=float)
|
63 |
+
parser.add_argument("--max_steps", default=-1, type=int)
|
64 |
+
parser.add_argument("--lr_scheduler_type", default="constant_with_warmup", type=str)
|
65 |
+
parser.add_argument("--warmup_ratio", default=0.0, type=float)
|
66 |
+
parser.add_argument("--warmup_steps", default=3000, type=int)
|
67 |
+
parser.add_argument("--logging_steps", default=300, type=int)
|
68 |
+
parser.add_argument("--save_strategy", default="steps", type=str)
|
69 |
+
parser.add_argument("--save_steps", default=500, type=int)
|
70 |
+
parser.add_argument("--save_total_limit", default=2, type=int)
|
71 |
+
parser.add_argument("--no_cuda", action="store_true")
|
72 |
+
parser.add_argument("--seed", default=3407, type=str, help="https://arxiv.org/abs/2109.08203")
|
73 |
+
# parser.add_argument("--fp16", action="store_true")
|
74 |
+
parser.add_argument("--fp16", action="store_false")
|
75 |
+
parser.add_argument("--half_precision_backend", default="auto", type=str)
|
76 |
+
parser.add_argument("--dataloader_num_workers", default=0, type=int)
|
77 |
+
parser.add_argument("--disable_tqdm", action="store_true")
|
78 |
+
# parser.add_argument("--disable_tqdm", action="store_false")
|
79 |
+
parser.add_argument("--remove_unused_columns", action="store_true")
|
80 |
+
# parser.add_argument("--remove_unused_columns", action="store_false")
|
81 |
+
# parser.add_argument("--deepspeed", default="ds_z3_config.json", type=str)
|
82 |
+
parser.add_argument("--deepspeed", default=None, type=str)
|
83 |
+
parser.add_argument("--optim", default="paged_adamw_32bit", type=str)
|
84 |
+
parser.add_argument("--report_to", default="tensorboard", type=str)
|
85 |
+
parser.add_argument("--resume_from_checkpoint", default="file_dir/serialization_dir/checkpoint-103000", type=str)
|
86 |
+
# parser.add_argument("--gradient_checkpointing", action="store_true")
|
87 |
+
parser.add_argument("--gradient_checkpointing", action="store_false")
|
88 |
+
|
89 |
+
# dataset process
|
90 |
+
parser.add_argument("--truncate_longer_samples", action="store_true")
|
91 |
+
parser.add_argument("--max_seq_length", default=1024, type=int)
|
92 |
+
|
93 |
+
# lora
|
94 |
+
parser.add_argument("--lora_rank", default=64, type=int)
|
95 |
+
parser.add_argument("--lora_alpha", default=16, type=int)
|
96 |
+
parser.add_argument("--lora_dropout", default=0.05, type=int)
|
97 |
+
|
98 |
+
args = parser.parse_args()
|
99 |
+
return args
|
100 |
+
|
101 |
+
|
102 |
+
def verify_model_dtype(model):
|
103 |
+
"""
|
104 |
+
查看模型种各种类型的参数的情况
|
105 |
+
"""
|
106 |
+
dtype2param_num = defaultdict(int) # 每种数据类型的参数量
|
107 |
+
dtype2param_name = defaultdict(list) # 每种数据类型的参数名称
|
108 |
+
dtype2trainable_param_num = defaultdict(int) # 每种数据类型参与训练的参数量
|
109 |
+
dtype2trainable_param_name = defaultdict(list) # 每种数据类型参与训练的参数名称
|
110 |
+
for name, p in model.named_parameters():
|
111 |
+
dtype = p.dtype
|
112 |
+
dtype2param_num[dtype] += p.numel()
|
113 |
+
dtype2param_name[dtype].append(name)
|
114 |
+
if p.requires_grad:
|
115 |
+
dtype2trainable_param_num[dtype] += p.numel()
|
116 |
+
dtype2trainable_param_name[dtype].append(name)
|
117 |
+
|
118 |
+
# 统计全部参数中, 各种类型参数分布.
|
119 |
+
total = 0
|
120 |
+
print('verify all params of the model')
|
121 |
+
for k, v in dtype2param_num.items():
|
122 |
+
total += v
|
123 |
+
for k, v in dtype2param_num.items():
|
124 |
+
print(k, v, v / total)
|
125 |
+
for k, v in dtype2trainable_param_name.items():
|
126 |
+
print(k, v)
|
127 |
+
|
128 |
+
print()
|
129 |
+
# 统计可训练参数中, 各种类型参数分布.
|
130 |
+
print('verify trainable params the model')
|
131 |
+
total_trainable = 0
|
132 |
+
for k, v in dtype2trainable_param_num.items():
|
133 |
+
total_trainable += v
|
134 |
+
for k, v in dtype2trainable_param_num.items():
|
135 |
+
print(k, v, v / total_trainable)
|
136 |
+
for k, v in dtype2trainable_param_num.items():
|
137 |
+
print(k, v)
|
138 |
+
|
139 |
+
|
140 |
+
def find_all_linear_names(model):
|
141 |
+
"""
|
142 |
+
找出所有全连接层,为所有全连接添加adapter
|
143 |
+
"""
|
144 |
+
cls = bnb.nn.Linear4bit
|
145 |
+
lora_module_names = set()
|
146 |
+
for name, module in model.named_modules():
|
147 |
+
if isinstance(module, cls):
|
148 |
+
names = name.split('.')
|
149 |
+
lora_module_names.add(names[0] if len(names) == 1 else names[-1])
|
150 |
+
|
151 |
+
if 'lm_head' in lora_module_names: # needed for 16-bit
|
152 |
+
lora_module_names.remove('lm_head')
|
153 |
+
return list(lora_module_names)
|
154 |
+
|
155 |
+
|
156 |
+
def main():
|
157 |
+
args = get_args()
|
158 |
+
|
159 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
160 |
+
os.makedirs(args.cache_dir, exist_ok=True)
|
161 |
+
|
162 |
+
# dataset
|
163 |
+
dataset_dict = load_dataset(
|
164 |
+
path=args.dataset_path,
|
165 |
+
name=args.dataset_name,
|
166 |
+
split=args.dataset_split,
|
167 |
+
cache_dir=args.dataset_cache_dir
|
168 |
+
)
|
169 |
+
train_dataset = dataset_dict["train"]
|
170 |
+
print(train_dataset)
|
171 |
+
|
172 |
+
# training_args
|
173 |
+
training_args = TrainingArguments(
|
174 |
+
output_dir=args.output_dir,
|
175 |
+
overwrite_output_dir=args.overwrite_output_dir,
|
176 |
+
evaluation_strategy=args.evaluation_strategy,
|
177 |
+
per_device_train_batch_size=args.per_device_train_batch_size,
|
178 |
+
gradient_accumulation_steps=args.gradient_accumulation_steps,
|
179 |
+
learning_rate=args.learning_rate,
|
180 |
+
weight_decay=args.weight_decay,
|
181 |
+
max_grad_norm=args.max_grad_norm,
|
182 |
+
num_train_epochs=args.num_train_epochs,
|
183 |
+
max_steps=args.max_steps,
|
184 |
+
lr_scheduler_type=args.lr_scheduler_type,
|
185 |
+
warmup_steps=args.warmup_steps,
|
186 |
+
logging_steps=args.logging_steps,
|
187 |
+
save_strategy=args.save_strategy,
|
188 |
+
save_steps=args.save_steps,
|
189 |
+
save_total_limit=args.save_total_limit,
|
190 |
+
no_cuda=args.no_cuda,
|
191 |
+
fp16=args.fp16,
|
192 |
+
half_precision_backend=args.half_precision_backend,
|
193 |
+
dataloader_num_workers=args.dataloader_num_workers,
|
194 |
+
disable_tqdm=args.disable_tqdm,
|
195 |
+
remove_unused_columns=args.remove_unused_columns,
|
196 |
+
# deepspeed=args.deepspeed,
|
197 |
+
optim=args.optim,
|
198 |
+
report_to=args.report_to,
|
199 |
+
resume_from_checkpoint=args.resume_from_checkpoint,
|
200 |
+
gradient_checkpointing=args.gradient_checkpointing,
|
201 |
+
)
|
202 |
+
|
203 |
+
# pretrained model
|
204 |
+
model = AutoModelForCausalLM.from_pretrained(
|
205 |
+
args.pretrained_model_name_or_path,
|
206 |
+
device_map={"": 0},
|
207 |
+
load_in_4bit=True,
|
208 |
+
torch_dtype=torch.float16,
|
209 |
+
trust_remote_code=True,
|
210 |
+
quantization_config=BitsAndBytesConfig(
|
211 |
+
load_in_4bit=True,
|
212 |
+
bnb_4bit_compute_dtype=torch.float16,
|
213 |
+
bnb_4bit_use_double_quant=True,
|
214 |
+
bnb_4bit_quant_type="nf4",
|
215 |
+
llm_int8_threshold=6.0,
|
216 |
+
llm_int8_has_fp16_weight=False,
|
217 |
+
),
|
218 |
+
)
|
219 |
+
# tokenizer
|
220 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
221 |
+
args.pretrained_model_name_or_path,
|
222 |
+
trust_remote_code=True,
|
223 |
+
use_fast=False if model.config.model_type == "llama" else True
|
224 |
+
)
|
225 |
+
# QWenTokenizer比较特殊, pad_token_id, bos_token_id, eos_token_id 均 为None. eod_id对应的token为<|endoftext|>
|
226 |
+
if tokenizer.__class__.__name__ == "QWenTokenizer":
|
227 |
+
tokenizer.pad_token_id = tokenizer.eod_id
|
228 |
+
tokenizer.bos_token_id = tokenizer.eod_id
|
229 |
+
tokenizer.eos_token_id = tokenizer.eod_id
|
230 |
+
|
231 |
+
# model
|
232 |
+
# casts all the non int8 modules to full precision (fp32) for stability
|
233 |
+
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=args.gradient_checkpointing)
|
234 |
+
print(f"memory footprint of model: {model.get_memory_footprint() / (1024*1024*1024)} GB")
|
235 |
+
|
236 |
+
# 找到所有需要插入adapter的全连接层
|
237 |
+
target_modules = find_all_linear_names(model)
|
238 |
+
config = LoraConfig(
|
239 |
+
r=args.lora_rank,
|
240 |
+
lora_alpha=args.lora_alpha,
|
241 |
+
target_modules=target_modules,
|
242 |
+
lora_dropout=args.lora_dropout,
|
243 |
+
bias="none",
|
244 |
+
task_type="CAUSAL_LM",
|
245 |
+
)
|
246 |
+
model = get_peft_model(model, config)
|
247 |
+
model.print_trainable_parameters()
|
248 |
+
model.config.torch_dtype = torch.float32
|
249 |
+
|
250 |
+
# 查看模型种各种类型的参数的情况
|
251 |
+
verify_model_dtype(model)
|
252 |
+
|
253 |
+
# 初始化损失函数
|
254 |
+
loss_func = TargetLMLoss(ignore_index=-100)
|
255 |
+
|
256 |
+
data_collator = SFTDataCollator(tokenizer, args.max_seq_length)
|
257 |
+
|
258 |
+
# dataset
|
259 |
+
def encode_with_truncation(examples):
|
260 |
+
prompt_ = examples.pop('prompt')
|
261 |
+
response_ = examples.pop('response')
|
262 |
+
utterances = [
|
263 |
+
prompt_,
|
264 |
+
response_
|
265 |
+
]
|
266 |
+
|
267 |
+
utterances_ids = tokenizer(utterances, add_special_tokens=False).input_ids
|
268 |
+
|
269 |
+
input_ids = [tokenizer.bos_token_id]
|
270 |
+
target_mask = [0]
|
271 |
+
for i, utterances_id in enumerate(utterances_ids):
|
272 |
+
input_ids += (utterances_id + [tokenizer.eos_token_id])
|
273 |
+
|
274 |
+
if i % 2 == 0:
|
275 |
+
target_mask += [0] * (len(utterances_id) + 1)
|
276 |
+
else:
|
277 |
+
target_mask += [1] * (len(utterances_id) + 1)
|
278 |
+
|
279 |
+
assert len(input_ids) == len(target_mask)
|
280 |
+
|
281 |
+
input_ids = input_ids[:args.max_seq_length]
|
282 |
+
target_mask = target_mask[:args.max_seq_length]
|
283 |
+
attention_mask = [1] * len(input_ids)
|
284 |
+
|
285 |
+
assert len(input_ids) == len(target_mask) == len(attention_mask)
|
286 |
+
|
287 |
+
inputs = {
|
288 |
+
"input_ids": input_ids,
|
289 |
+
"attention_mask": attention_mask,
|
290 |
+
"target_mask": target_mask
|
291 |
+
}
|
292 |
+
return inputs
|
293 |
+
|
294 |
+
train_dataset = train_dataset.map(
|
295 |
+
encode_with_truncation,
|
296 |
+
batched=False,
|
297 |
+
keep_in_memory=False,
|
298 |
+
num_proc=None if platform.system() == "Windows" else os.cpu_count(),
|
299 |
+
cache_file_name=os.path.join(args.cache_dir, "train.cache")
|
300 |
+
)
|
301 |
+
train_dataset.set_format(type=None, columns=["input_ids", "attention_mask", "target_mask"])
|
302 |
+
print("Train Dataset Examples Batch Number: {}".format(len(train_dataset)))
|
303 |
+
|
304 |
+
# 初始化 Trainer
|
305 |
+
trainer = LoRATrainer(
|
306 |
+
model=model,
|
307 |
+
args=training_args,
|
308 |
+
train_dataset=train_dataset,
|
309 |
+
# tokenizer=tokenizer,
|
310 |
+
data_collator=data_collator,
|
311 |
+
compute_loss=loss_func
|
312 |
+
)
|
313 |
+
train_result = trainer.train()
|
314 |
+
|
315 |
+
# 保存最好的 checkpoint
|
316 |
+
final_save_path = os.path.join(training_args.output_dir, "final")
|
317 |
+
trainer.save_model(final_save_path) # Saves the tokenizer too
|
318 |
+
# 保存训练指标
|
319 |
+
metrics = train_result.metrics
|
320 |
+
trainer.log_metrics("train", metrics)
|
321 |
+
trainer.save_metrics("train", metrics)
|
322 |
+
trainer.save_state()
|
323 |
+
return
|
324 |
+
|
325 |
+
|
326 |
+
if __name__ == '__main__':
|
327 |
+
main()
|
examples/exercises/chinese_modern_poetry/3.merge_lora.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
|
5 |
+
from peft import PeftModel
|
6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
7 |
+
import torch
|
8 |
+
"""
|
9 |
+
使用该脚本,将lora的权重合并大base model中
|
10 |
+
"""
|
11 |
+
|
12 |
+
|
13 |
+
def get_args():
|
14 |
+
"""
|
15 |
+
python3 3.merge_lora.py \
|
16 |
+
--pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/pretrained_models/huggingface/Qwen/Qwen-7B \
|
17 |
+
--adapter_name_or_path /data/tianxing/PycharmProjects/Transformers/examples/exercises/chinese_modern_poetry/file_dir/serialization_dir/checkpoint-27000 \
|
18 |
+
--save_directory /data/tianxing/PycharmProjects/Transformers/trained_models/qwen_7b_modern_poetry
|
19 |
+
|
20 |
+
"""
|
21 |
+
parser = argparse.ArgumentParser()
|
22 |
+
|
23 |
+
parser.add_argument(
|
24 |
+
"--pretrained_model_name_or_path",
|
25 |
+
default="YeungNLP/firefly-chatglm2-6b",
|
26 |
+
type=str
|
27 |
+
)
|
28 |
+
parser.add_argument(
|
29 |
+
"--adapter_name_or_path",
|
30 |
+
default="YeungNLP/firefly-baichuan-7b-qlora-sft",
|
31 |
+
type=str
|
32 |
+
)
|
33 |
+
parser.add_argument("--save_directory", default="save_directory", type=str)
|
34 |
+
|
35 |
+
args = parser.parse_args()
|
36 |
+
return args
|
37 |
+
|
38 |
+
|
39 |
+
def main():
|
40 |
+
args = get_args()
|
41 |
+
|
42 |
+
config = AutoConfig.from_pretrained(
|
43 |
+
args.pretrained_model_name_or_path,
|
44 |
+
trust_remote_code=True,
|
45 |
+
)
|
46 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
47 |
+
args.pretrained_model_name_or_path,
|
48 |
+
trust_remote_code=True,
|
49 |
+
# llama不支持fast
|
50 |
+
use_fast=False if config.model_type == 'llama' else True
|
51 |
+
)
|
52 |
+
|
53 |
+
model = AutoModelForCausalLM.from_pretrained(
|
54 |
+
args.pretrained_model_name_or_path,
|
55 |
+
trust_remote_code=True,
|
56 |
+
low_cpu_mem_usage=True,
|
57 |
+
torch_dtype=torch.float16,
|
58 |
+
# device_map='auto',
|
59 |
+
device_map={"": "cpu"}
|
60 |
+
)
|
61 |
+
model = PeftModel.from_pretrained(model, args.adapter_name_or_path, device_map={"": "cpu"})
|
62 |
+
model = model.merge_and_unload()
|
63 |
+
|
64 |
+
tokenizer.save_pretrained(args.save_directory)
|
65 |
+
model.save_pretrained(args.save_directory)
|
66 |
+
return
|
67 |
+
|
68 |
+
|
69 |
+
if __name__ == '__main__':
|
70 |
+
main()
|
examples/exercises/chinese_modern_poetry/4.test_model.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
|
7 |
+
pwd = os.path.abspath(os.path.dirname(__file__))
|
8 |
+
sys.path.append(os.path.join(pwd, '../../../'))
|
9 |
+
|
10 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
11 |
+
import torch
|
12 |
+
|
13 |
+
from project_settings import project_path
|
14 |
+
"""
|
15 |
+
单轮对话,不具有对话历史的记忆功能
|
16 |
+
"""
|
17 |
+
|
18 |
+
|
19 |
+
def get_args():
|
20 |
+
"""
|
21 |
+
python3 4.test_model.py --pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/trained_models/qwen_7b_chinese_modern_poetry
|
22 |
+
python3 4.test_model.py --pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/trained_models/qwen_7b_modern_poetry
|
23 |
+
|
24 |
+
python3 4.test_model.py --pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/pretrained_models/huggingface/Qwen/Qwen-7B
|
25 |
+
|
26 |
+
|
27 |
+
"""
|
28 |
+
parser = argparse.ArgumentParser()
|
29 |
+
parser.add_argument(
|
30 |
+
"--pretrained_model_name_or_path",
|
31 |
+
# default="YeungNLP/firefly-chatglm2-6b",
|
32 |
+
default=(project_path / "trained_models/firefly_chatglm2_6b_intent").as_posix(),
|
33 |
+
type=str
|
34 |
+
)
|
35 |
+
parser.add_argument("--max_new_tokens", default=512, type=int)
|
36 |
+
parser.add_argument("--top_p", default=0.9, type=float)
|
37 |
+
parser.add_argument("--temperature", default=0.35, type=float)
|
38 |
+
parser.add_argument("--repetition_penalty", default=1.0, type=float)
|
39 |
+
parser.add_argument('--device', default="cuda" if torch.cuda.is_available() else "cpu", type=str)
|
40 |
+
|
41 |
+
args = parser.parse_args()
|
42 |
+
return args
|
43 |
+
|
44 |
+
|
45 |
+
def main():
|
46 |
+
args = get_args()
|
47 |
+
|
48 |
+
model = AutoModelForCausalLM.from_pretrained(
|
49 |
+
args.pretrained_model_name_or_path,
|
50 |
+
trust_remote_code=True,
|
51 |
+
# low_cpu_mem_usage=True,
|
52 |
+
torch_dtype=torch.float16,
|
53 |
+
# device_map="auto",
|
54 |
+
device_map={"": 0},
|
55 |
+
# offload_folder="./offload",
|
56 |
+
).to(args.device).eval()
|
57 |
+
|
58 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
59 |
+
args.pretrained_model_name_or_path,
|
60 |
+
trust_remote_code=True,
|
61 |
+
# llama不支持fast
|
62 |
+
use_fast=False if model.config.model_type == "llama" else True,
|
63 |
+
padding_side="left"
|
64 |
+
|
65 |
+
)
|
66 |
+
|
67 |
+
# QWenTokenizer比较特殊, pad_token_id, bos_token_id, eos_token_id 均 为None. eod_id对应的token为<|endoftext|>
|
68 |
+
if tokenizer.__class__.__name__ == "QWenTokenizer":
|
69 |
+
tokenizer.pad_token_id = tokenizer.eod_id
|
70 |
+
tokenizer.bos_token_id = tokenizer.eod_id
|
71 |
+
tokenizer.eos_token_id = tokenizer.eod_id
|
72 |
+
|
73 |
+
text = input("User: ")
|
74 |
+
while True:
|
75 |
+
text = text.strip()
|
76 |
+
# chatglm使用官方的数据组织格式
|
77 |
+
if model.config.model_type == "chatglm":
|
78 |
+
text = "[Round 1]\n\n问:{}\n\n答:".format(text)
|
79 |
+
input_ids = tokenizer(text, return_tensors="pt", add_special_tokens=False).input_ids.to(args.device)
|
80 |
+
# 为了兼容qwen-7b,因为其对eos_token进行tokenize,无法得到对应的eos_token_id
|
81 |
+
else:
|
82 |
+
input_ids = tokenizer(
|
83 |
+
text,
|
84 |
+
return_tensors="pt",
|
85 |
+
add_special_tokens=False,
|
86 |
+
).input_ids.to(args.device)
|
87 |
+
bos_token_id = torch.tensor([[tokenizer.bos_token_id]], dtype=torch.long).to(args.device)
|
88 |
+
eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long).to(args.device)
|
89 |
+
input_ids = torch.concat([bos_token_id, input_ids, eos_token_id], dim=1)
|
90 |
+
with torch.no_grad():
|
91 |
+
outputs = model.generate(
|
92 |
+
input_ids=input_ids, max_new_tokens=args.max_new_tokens, do_sample=True,
|
93 |
+
top_p=args.top_p, temperature=args.temperature, repetition_penalty=args.repetition_penalty,
|
94 |
+
eos_token_id=tokenizer.eos_token_id
|
95 |
+
)
|
96 |
+
outputs = outputs.tolist()[0][len(input_ids[0]):]
|
97 |
+
response = tokenizer.decode(outputs)
|
98 |
+
response = response.strip().replace(tokenizer.eos_token, "").strip()
|
99 |
+
print("LLM: {}".format(response))
|
100 |
+
text = input('User: ')
|
101 |
+
|
102 |
+
|
103 |
+
if __name__ == '__main__':
|
104 |
+
main()
|
examples/exercises/chinese_modern_poetry/run.sh
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
# sh run.sh --stage 0 --stop_stage 0 --system_version centos
|
4 |
+
# sh run.sh --stage 1 --stop_stage 1 --system_version centos
|
5 |
+
# sh run.sh --stage 2 --stop_stage 2 --system_version centos
|
6 |
+
# sh run.sh --stage 4 --stop_stage 4 --system_version centos --final_model_name qwen_7b_modern_poetry
|
7 |
+
|
8 |
+
# bitsandbytes
|
9 |
+
export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
10 |
+
|
11 |
+
# params
|
12 |
+
system_version="windows";
|
13 |
+
verbose=true;
|
14 |
+
stage=0 # start from 0 if you need to start from data preparation
|
15 |
+
stop_stage=5
|
16 |
+
|
17 |
+
pretrained_model_supplier=Qwen
|
18 |
+
pretrained_model_name=Qwen-7B
|
19 |
+
|
20 |
+
final_checkpoint_dir=final
|
21 |
+
final_model_name=qwen_7b_modern_poetry
|
22 |
+
|
23 |
+
patience=0
|
24 |
+
|
25 |
+
|
26 |
+
# parse options
|
27 |
+
while true; do
|
28 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
29 |
+
case "$1" in
|
30 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
31 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
32 |
+
old_value="(eval echo \\$$name)";
|
33 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
34 |
+
was_bool=true;
|
35 |
+
else
|
36 |
+
was_bool=false;
|
37 |
+
fi
|
38 |
+
|
39 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
40 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
41 |
+
eval "${name}=\"$2\"";
|
42 |
+
|
43 |
+
# Check that Boolean-valued arguments are really Boolean.
|
44 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
45 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
46 |
+
exit 1;
|
47 |
+
fi
|
48 |
+
shift 2;
|
49 |
+
;;
|
50 |
+
|
51 |
+
*) break;
|
52 |
+
esac
|
53 |
+
done
|
54 |
+
|
55 |
+
|
56 |
+
$verbose && echo "system_version: ${system_version}"
|
57 |
+
|
58 |
+
work_dir="$(pwd)"
|
59 |
+
file_dir="${work_dir}/file_dir"
|
60 |
+
cache_dir="${file_dir}/cache_dir"
|
61 |
+
serialization_dir="${file_dir}/serialization_dir"
|
62 |
+
|
63 |
+
pretrained_models_dir="${work_dir}/../../../pretrained_models/huggingface/${pretrained_model_supplier}"
|
64 |
+
final_model_dir="${work_dir}/../../../trained_models/${final_model_name}";
|
65 |
+
|
66 |
+
mkdir -p "${file_dir}"
|
67 |
+
mkdir -p "${cache_dir}"
|
68 |
+
mkdir -p "${serialization_dir}"
|
69 |
+
mkdir -p "${pretrained_models_dir}"
|
70 |
+
mkdir -p "${final_model_dir}"
|
71 |
+
|
72 |
+
export PYTHONPATH="${work_dir}/../../.."
|
73 |
+
|
74 |
+
|
75 |
+
if [ $system_version == "windows" ]; then
|
76 |
+
alias python3='C:/Users/tianx/PycharmProjects/virtualenv/Transformers/Scripts/python.exe'
|
77 |
+
elif [ $system_version == "centos" ]; then
|
78 |
+
# conda activate Transformers
|
79 |
+
alias python3='/usr/local/miniconda3/envs/Transformers/bin/python3'
|
80 |
+
elif [ $system_version == "ubuntu" ]; then
|
81 |
+
alias python3='/usr/local/miniconda3/envs/Transformers/bin/python3'
|
82 |
+
elif [ $system_version == "macos" ]; then
|
83 |
+
alias python3='/Users/honey/PycharmProjects/virtualenv/TrainLLM/bin/python'
|
84 |
+
fi
|
85 |
+
|
86 |
+
|
87 |
+
function search_best_ckpt() {
|
88 |
+
patience="$1";
|
89 |
+
|
90 |
+
cd "${serialization_dir}" || exit 1
|
91 |
+
last_epoch=$(ls . | \
|
92 |
+
grep "checkpoint-*" | \
|
93 |
+
awk -F'[-]' '{print$2}' | \
|
94 |
+
sort -n | \
|
95 |
+
awk 'END {print}')
|
96 |
+
|
97 |
+
target_dir=
|
98 |
+
if [ -n "${last_epoch}" ]; then
|
99 |
+
target_epoch=$((last_epoch - patience))
|
100 |
+
|
101 |
+
for epoch_idx in $(ls . | grep "checkpoint-*" | awk -F'[-]' '{print$2}' | sort -nr):
|
102 |
+
do
|
103 |
+
if [ "${epoch_idx}" -le "${target_epoch}" ]; then
|
104 |
+
target_dir="checkpoint-${epoch_idx}";
|
105 |
+
break;
|
106 |
+
fi
|
107 |
+
done
|
108 |
+
fi
|
109 |
+
|
110 |
+
echo "${target_dir}"
|
111 |
+
}
|
112 |
+
|
113 |
+
|
114 |
+
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
115 |
+
$verbose && echo "stage 0: download pretrained model"
|
116 |
+
cd "${pretrained_models_dir}" || exit 1;
|
117 |
+
|
118 |
+
if [ ! -d "${pretrained_model_name}" ]; then
|
119 |
+
git clone "https://huggingface.co/${pretrained_model_supplier}/${pretrained_model_name}/"
|
120 |
+
|
121 |
+
rm -rf .git
|
122 |
+
rm -rf .gitattributes
|
123 |
+
rm -rf flax_model.msgpack
|
124 |
+
rm -rf model.safetensors
|
125 |
+
fi
|
126 |
+
|
127 |
+
cd "${pretrained_models_dir}/${pretrained_model_name}" || exit 1;
|
128 |
+
|
129 |
+
# pytorch_model.bin
|
130 |
+
if [ -e "pytorch_model.bin" ]; then
|
131 |
+
data_size=$(ls -l pytorch_model.bin | awk '{print $5}')
|
132 |
+
if [ "${data_size}" == "135" ]; then
|
133 |
+
rm -rf pytorch_model.bin;
|
134 |
+
fi
|
135 |
+
fi
|
136 |
+
if [ ! -e "pytorch_model.bin" ]; then
|
137 |
+
wget -c "https://huggingface.co/${pretrained_model_supplier}/${pretrained_model_name}/resolve/main/pytorch_model.bin"
|
138 |
+
fi
|
139 |
+
|
140 |
+
# tokenizer.json
|
141 |
+
if [ -e "tokenizer.json" ]; then
|
142 |
+
data_size=$(ls -l tokenizer.json | awk '{print $5}')
|
143 |
+
if [ "${data_size}" == "135" ]; then
|
144 |
+
rm -rf tokenizer.json;
|
145 |
+
fi
|
146 |
+
fi
|
147 |
+
if [ ! -e "tokenizer.json" ]; then
|
148 |
+
wget -c "https://huggingface.co/${pretrained_model_supplier}/${pretrained_model_name}/resolve/main/tokenizer.json"
|
149 |
+
fi
|
150 |
+
|
151 |
+
fi
|
152 |
+
|
153 |
+
|
154 |
+
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
155 |
+
$verbose && echo "stage 1: prepare data"
|
156 |
+
cd "${work_dir}" || exit 1;
|
157 |
+
|
158 |
+
python3 1.prepare_data.py
|
159 |
+
|
160 |
+
fi
|
161 |
+
|
162 |
+
|
163 |
+
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
164 |
+
$verbose && echo "stage 2: train model"
|
165 |
+
cd "${work_dir}" || exit 1;
|
166 |
+
|
167 |
+
python3 2.train_model.py \
|
168 |
+
--pretrained_model_name_or_path "${pretrained_models_dir}/${pretrained_model_name}" \
|
169 |
+
--cache_dir "${cache_dir}" \
|
170 |
+
--output_dir "${serialization_dir}"
|
171 |
+
|
172 |
+
fi
|
173 |
+
|
174 |
+
|
175 |
+
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
176 |
+
$verbose && echo "stage 3: merge lora"
|
177 |
+
cd "${work_dir}" || exit 1;
|
178 |
+
|
179 |
+
python3 3.merge_lora.py \
|
180 |
+
--pretrained_model_name_or_path "${pretrained_models_dir}/${pretrained_model_name}" \
|
181 |
+
--adapter_name_or_path "${serialization_dir}/${final_checkpoint_dir}" \
|
182 |
+
--save_directory "${final_model_dir}"
|
183 |
+
|
184 |
+
fi
|
185 |
+
|
186 |
+
|
187 |
+
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
188 |
+
$verbose && echo "stage 4: collect files"
|
189 |
+
cd "${work_dir}" || exit 1;
|
190 |
+
|
191 |
+
cp "${pretrained_models_dir}/${pretrained_model_name}/configuration_qwen.py" "${final_model_dir}/configuration_qwen.py"
|
192 |
+
cp "${pretrained_models_dir}/${pretrained_model_name}/modeling_qwen.py" "${final_model_dir}/modeling_qwen.py"
|
193 |
+
cp "${pretrained_models_dir}/${pretrained_model_name}/qwen_generation_utils.py" "${final_model_dir}/qwen_generation_utils.py"
|
194 |
+
cp "${pretrained_models_dir}/${pretrained_model_name}/tokenization_qwen.py" "${final_model_dir}/tokenization_qwen.py"
|
195 |
+
|
196 |
+
fi
|
examples/exercises/chinese_modern_poetry/stop.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
main.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
import os
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
from transformers import AutoModel, AutoTokenizer
|
8 |
+
from transformers.models.auto import AutoModelForCausalLM, AutoTokenizer
|
9 |
+
# from transformers.utils.quantization_config import BitsAndBytesConfig
|
10 |
+
import torch
|
11 |
+
|
12 |
+
from project_settings import project_path
|
13 |
+
|
14 |
+
|
15 |
+
def get_args():
|
16 |
+
parser = argparse.ArgumentParser()
|
17 |
+
parser.add_argument("--train_subset", default="train.jsonl", type=str)
|
18 |
+
parser.add_argument("--valid_subset", default="valid.jsonl", type=str)
|
19 |
+
parser.add_argument(
|
20 |
+
"--pretrained_model_name_or_path",
|
21 |
+
# default="YeungNLP/firefly-chatglm2-6b",
|
22 |
+
default=(project_path / "trained_models/firefly_chatglm2_6b_intent").as_posix(),
|
23 |
+
type=str
|
24 |
+
)
|
25 |
+
parser.add_argument("--output_file", default="result.xlsx", type=str)
|
26 |
+
|
27 |
+
parser.add_argument("--max_new_tokens", default=512, type=int)
|
28 |
+
parser.add_argument("--top_p", default=0.9, type=float)
|
29 |
+
parser.add_argument("--temperature", default=0.35, type=float)
|
30 |
+
parser.add_argument("--repetition_penalty", default=1.0, type=float)
|
31 |
+
parser.add_argument('--device', default="cuda" if torch.cuda.is_available() else "cpu", type=str)
|
32 |
+
|
33 |
+
args = parser.parse_args()
|
34 |
+
return args
|
35 |
+
|
36 |
+
|
37 |
+
description = """
|
38 |
+
## ChatGLM-6B
|
39 |
+
|
40 |
+
基于 [firefly-chatglm2-6b](https://huggingface.co/YeungNLP/firefly-chatglm2-6b) 模型, 在 [telemarketing_intent](https://huggingface.co/datasets/qgyd2021/telemarketing_intent/tree/main/data/prompt) 的 prompt 数据集上训练, 目的是实现 `电话营销` 场景的 1-shot 意图识别.
|
41 |
+
|
42 |
+
该分类任务有一百多个类别, 但标注数据总是只有 3 万, 并且有一半是 "无关领域", 实现思路是:
|
43 |
+
1. 首先采用传统算法做硬分类, 然后提取概率 top 10 的标签.
|
44 |
+
2. 将 top 10 的标签作为候选标签, 并为每个标签提供一个句子示例.
|
45 |
+
3. 要求 LLM 输出目标句子的类别.
|
46 |
+
|
47 |
+
Gradio 布署代码参考了: https://huggingface.co/spaces/aodianyun/ChatGLM-6B
|
48 |
+
|
49 |
+
"""
|
50 |
+
|
51 |
+
|
52 |
+
examples = [
|
53 |
+
"""我们在做电话营销场景的意图识别任务, 可选的意图如下:
|
54 |
+
否定(不是); 礼貌用语; 否定答复; 肯定(需要); 用户正忙; 否定(不需要); 无关领域; 否定(没有); 否定(不用了); 价格太高
|
55 |
+
|
56 |
+
如果你认为给定的句子不属于这些意图中的任务一个, 你可以回答: 不知道.
|
57 |
+
|
58 |
+
Tips:
|
59 |
+
1. 如果候选意图中有 "无关领域", 当你不知道时, 则它有可能属于无关领域.
|
60 |
+
|
61 |
+
|
62 |
+
Examples:
|
63 |
+
|
64 |
+
---------
|
65 |
+
|
66 |
+
ExampleSentence: 其实不是
|
67 |
+
ExampleIntent: 否定(不是)
|
68 |
+
|
69 |
+
ExampleSentence: 嗯!嘿嘿!早点休息,晚安咯
|
70 |
+
ExampleIntent: 礼貌用语
|
71 |
+
|
72 |
+
ExampleSentence: 没问诶
|
73 |
+
ExampleIntent: 否定答复
|
74 |
+
|
75 |
+
ExampleSentence: 不好意思都需要谢谢
|
76 |
+
ExampleIntent: 肯定(需要)
|
77 |
+
|
78 |
+
ExampleSentence: 对呀我在忙
|
79 |
+
ExampleIntent: 用户正忙
|
80 |
+
|
81 |
+
ExampleSentence: 。嗯也也不需要吧唉呀现在不需要那个啊嗯
|
82 |
+
ExampleIntent: 否定(不需要)
|
83 |
+
|
84 |
+
ExampleSentence: 我的处理器需要很少的电源。
|
85 |
+
ExampleIntent: 无关领域
|
86 |
+
|
87 |
+
ExampleSentence: 。呃我好像没有在太平洋买过保险,吧拜拜
|
88 |
+
ExampleIntent: 否定(没有)
|
89 |
+
|
90 |
+
ExampleSentence: 嗯不用谢谢
|
91 |
+
ExampleIntent: 否定(不用了)
|
92 |
+
|
93 |
+
ExampleSentence: 费用贵。
|
94 |
+
ExampleIntent: 价格太高
|
95 |
+
|
96 |
+
---------
|
97 |
+
|
98 |
+
Sentence: 。嗯各位不需要,啊谢谢
|
99 |
+
Intent:""",
|
100 |
+
"""我们在做电话营销场景的意图识别任务, 可选的意图如下:
|
101 |
+
语音信箱; 无关领域; 查物品信息; 污言秽语; 疑问(时间); 疑问(数值); 答时间; 查收费方式; 价格太高; 答数值
|
102 |
+
|
103 |
+
如果你认为给定的句子不属于这些意图中的任务一个, 你可以回答: 不知道.
|
104 |
+
|
105 |
+
Tips:
|
106 |
+
1. 如果候选意图中有 "无关领域", 当你不知道时, 则它有可能属于无关领域.
|
107 |
+
|
108 |
+
|
109 |
+
Examples:
|
110 |
+
---------
|
111 |
+
|
112 |
+
ExampleSentence: 我们留言。
|
113 |
+
ExampleIntent: 语音信箱
|
114 |
+
|
115 |
+
ExampleSentence: 很刚刚打
|
116 |
+
ExampleIntent: 无关领域
|
117 |
+
|
118 |
+
ExampleSentence: 什么东西我听
|
119 |
+
ExampleIntent: 查物品信息
|
120 |
+
|
121 |
+
ExampleSentence: 知道!AV女优!日本人的骄傲!
|
122 |
+
ExampleIntent: 污言秽语
|
123 |
+
|
124 |
+
ExampleSentence: 最后期限
|
125 |
+
ExampleIntent: 疑问(时间)
|
126 |
+
|
127 |
+
ExampleSentence: 一共借了多少钱
|
128 |
+
ExampleIntent: 疑问(数值)
|
129 |
+
|
130 |
+
ExampleSentence: 22号
|
131 |
+
ExampleIntent: 答时间
|
132 |
+
|
133 |
+
ExampleSentence: 运费
|
134 |
+
ExampleIntent: 查收费方式
|
135 |
+
|
136 |
+
ExampleSentence: 利息高
|
137 |
+
ExampleIntent: 价格太高
|
138 |
+
|
139 |
+
ExampleSentence: 20。
|
140 |
+
ExampleIntent: 答数值
|
141 |
+
|
142 |
+
---------
|
143 |
+
|
144 |
+
Sentence: 。对啊什么东西啊我6月份出来的
|
145 |
+
Intent:"""
|
146 |
+
]
|
147 |
+
|
148 |
+
|
149 |
+
def main():
|
150 |
+
args = get_args()
|
151 |
+
|
152 |
+
use_cpu = os.environ.get("USE_CPU", "all")
|
153 |
+
|
154 |
+
tokenizer = AutoTokenizer.from_pretrained(args.pretrained_model_name_or_path, trust_remote_code=True)
|
155 |
+
# QWenTokenizer比较特殊, pad_token_id, bos_token_id, eos_token_id 均 为None. eod_id对应的token为<|endoftext|>
|
156 |
+
if tokenizer.__class__.__name__ == "QWenTokenizer":
|
157 |
+
tokenizer.pad_token_id = tokenizer.eod_id
|
158 |
+
tokenizer.bos_token_id = tokenizer.eod_id
|
159 |
+
tokenizer.eos_token_id = tokenizer.eod_id
|
160 |
+
|
161 |
+
if not use_cpu:
|
162 |
+
model = AutoModel.from_pretrained(
|
163 |
+
args.pretrained_model_name_or_path,
|
164 |
+
trust_remote_code=True
|
165 |
+
).half().cuda()
|
166 |
+
else:
|
167 |
+
model = AutoModelForCausalLM.from_pretrained(
|
168 |
+
args.pretrained_model_name_or_path,
|
169 |
+
trust_remote_code=True,
|
170 |
+
low_cpu_mem_usage=True,
|
171 |
+
torch_dtype=torch.bfloat16,
|
172 |
+
device_map="auto",
|
173 |
+
offload_folder="./offload",
|
174 |
+
offload_state_dict=True,
|
175 |
+
# load_in_4bit=True,
|
176 |
+
)
|
177 |
+
model = model.eval()
|
178 |
+
|
179 |
+
def fn(inputs, history=None):
|
180 |
+
if history is None:
|
181 |
+
history = list()
|
182 |
+
|
183 |
+
with torch.no_grad():
|
184 |
+
response, history = model.chat(tokenizer, inputs, history)
|
185 |
+
|
186 |
+
return history, history
|
187 |
+
|
188 |
+
with gr.Blocks() as blocks:
|
189 |
+
gr.Markdown(value=description)
|
190 |
+
|
191 |
+
state = gr.State([])
|
192 |
+
|
193 |
+
chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400)
|
194 |
+
with gr.Row():
|
195 |
+
with gr.Column(scale=4):
|
196 |
+
text = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
|
197 |
+
with gr.Column(scale=1):
|
198 |
+
button = gr.Button("Generate")
|
199 |
+
|
200 |
+
gr.Examples(examples, text)
|
201 |
+
|
202 |
+
text.submit(fn, [text, state], [chatbot, state])
|
203 |
+
button.click(fn, [text, state], [chatbot, state])
|
204 |
+
|
205 |
+
blocks.queue().launch()
|
206 |
+
|
207 |
+
return
|
208 |
+
|
209 |
+
|
210 |
+
if __name__ == '__main__':
|
211 |
+
main()
|
project_settings.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import os
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
from toolbox.os.environment import EnvironmentManager
|
7 |
+
|
8 |
+
|
9 |
+
project_path = os.path.abspath(os.path.dirname(__file__))
|
10 |
+
project_path = Path(project_path)
|
11 |
+
|
12 |
+
|
13 |
+
environment = EnvironmentManager(
|
14 |
+
path=os.path.join(project_path, 'dotenv'),
|
15 |
+
env=os.environ.get('environment', 'dev'),
|
16 |
+
)
|
17 |
+
|
18 |
+
|
19 |
+
if __name__ == '__main__':
|
20 |
+
pass
|
requirements.txt
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.20.1
|
2 |
+
pydantic==1.10.12
|
3 |
+
thinc==7.4.6
|
4 |
+
spacy==2.3.9
|
5 |
+
accelerate==0.21.0
|
6 |
+
transformers==4.30.2
|
7 |
+
peft==0.4.0
|
8 |
+
bitsandbytes==0.39.0
|
9 |
+
numpy==1.21.4
|
10 |
+
pandas==1.2.5
|
11 |
+
tqdm==4.62.3
|
12 |
+
torch==1.13.0
|
13 |
+
datasets
|
14 |
+
python-dotenv==1.0.0
|
15 |
+
sentencepiece==0.1.99
|
16 |
+
scipy==1.10.1
|
script/install_bitsandbytes.sh
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
#bitsandbytes
|
4 |
+
#https://github.com/TimDettmers/bitsandbytes
|
5 |
+
#
|
6 |
+
### 安装bitsandbytes
|
7 |
+
#
|
8 |
+
#bitsandbytes 是 CUDA 自定义函数的轻量级包装器, 特别是 8 位优化器, 矩阵乘法 (LLM.int8()) 和量化函数.
|
9 |
+
#
|
10 |
+
#### 安装
|
11 |
+
#
|
12 |
+
#通过 `pip3 install bitsandbytes` 来安装.
|
13 |
+
#
|
14 |
+
#安装之后通过 `python -m bitsandbytes` 来验证安装是否成功.
|
15 |
+
#
|
16 |
+
#在某些情况下可能需要从源代码进行编译.
|
17 |
+
#
|
18 |
+
#```text
|
19 |
+
#git clone https://github.com/timdettmers/bitsandbytes.git
|
20 |
+
#cd bitsandbytes
|
21 |
+
#
|
22 |
+
## CUDA_VERSIONS in {110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 120}
|
23 |
+
## make argument in {cuda110, cuda11x, cuda12x}
|
24 |
+
## if you do not know what CUDA you have, try looking at the output of: python -m bitsandbytes
|
25 |
+
#CUDA_VERSION=117 make cuda11x
|
26 |
+
#python setup.py install
|
27 |
+
#```
|
28 |
+
#
|
29 |
+
#### 备注
|
30 |
+
#
|
31 |
+
##### 必须安装与 GPU 版本相匹配的 CUDA
|
32 |
+
#
|
33 |
+
#我的情况如下:
|
34 |
+
#
|
35 |
+
#**GPU 和 CUDA 版本. **
|
36 |
+
#
|
37 |
+
#```text
|
38 |
+
## nvidia-smi
|
39 |
+
#Mon Aug 28 14:38:32 2023
|
40 |
+
#+-----------------------------------------------------------------------------+
|
41 |
+
#| NVIDIA-SMI 515.105.01 Driver Version: 515.105.01 CUDA Version: 11.7 |
|
42 |
+
#|-------------------------------+----------------------+----------------------+
|
43 |
+
#| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
|
44 |
+
#| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|
45 |
+
#| | | MIG M. |
|
46 |
+
#|===============================+======================+======================|
|
47 |
+
#| 0 Tesla V100S-PCI... Off | 00000000:0B:00.0 Off | 0 |
|
48 |
+
#| N/A 48C P0 40W / 250W | 14154MiB / 32768MiB | 0% Default |
|
49 |
+
#| | | N/A |
|
50 |
+
#+-------------------------------+----------------------+----------------------+
|
51 |
+
#
|
52 |
+
#+-----------------------------------------------------------------------------+
|
53 |
+
#| Processes: |
|
54 |
+
#| GPU GI CI PID Type Process name GPU Memory |
|
55 |
+
#| ID ID Usage |
|
56 |
+
#|=============================================================================|
|
57 |
+
#| 0 N/A N/A 11127 C python3 12973MiB |
|
58 |
+
#| 0 N/A N/A 25921 C python3 1177MiB |
|
59 |
+
#+-----------------------------------------------------------------------------+
|
60 |
+
#```
|
61 |
+
#
|
62 |
+
#**CUDA 版本. **
|
63 |
+
#
|
64 |
+
#我的经历, 安装 nvidia driver 驱动后 PyTorch 就可以使用 GPU 了, 同时 `nvidia-smi` 命令中也会显示 `CUDA Version: 11.7`.
|
65 |
+
#
|
66 |
+
#但是 `/usr/local/cuda:/usr/local/cuda-11.7` 是不存在的. 这个需要单独安装 (即: 安装 CUDA).
|
67 |
+
#
|
68 |
+
#```text
|
69 |
+
## ll /usr/local/ | grep cuda
|
70 |
+
#lrwxrwxrwx 1 root root 20 Aug 15 19:12 cuda -> /usr/local/cuda-11.7
|
71 |
+
#drwxr-xr-x 14 root root 268 Aug 15 18:31 cuda-11.7
|
72 |
+
#```
|
73 |
+
#
|
74 |
+
##### 从编译安装
|
75 |
+
#
|
76 |
+
#从编译安装使用的命令如下:
|
77 |
+
#
|
78 |
+
#```
|
79 |
+
#CUDA SETUP: Something unexpected happened. Please compile from source:
|
80 |
+
#git clone [email protected]:TimDettmers/bitsandbytes.git
|
81 |
+
#cd bitsandbytes
|
82 |
+
#CUDA_VERSION=117 make cuda11x_nomatmul
|
83 |
+
#python setup.py install
|
84 |
+
#```
|
85 |
+
#
|
86 |
+
#我的情况是没有使用容器, 在宿主机上安装的.
|
87 |
+
#
|
88 |
+
#1. 之前机器上安装的是 `/usr/local/cuda-10.4` 编译不通过. 因为 `/usr/local/cuda-10.4/bin` 下的 `nvcc` 编译器与 GPU 所需的 `CUDA Version: 11.7` 是不匹配的.
|
89 |
+
#2. 后来安装了 `/usr/local/cuda-11.7` 并删除 `/usr/local/cuda-10.4`, 但还是安装不成功.
|
90 |
+
#
|
91 |
+
#偶然的一次尝试:
|
92 |
+
#
|
93 |
+
#**需要使用conda虚拟环境**, 在 python 的 virtualenv 中安装失败了.
|
94 |
+
#
|
95 |
+
#**执行 `CUDA_VERSION=117 make cuda11x_nomatmul` 命令时, 确保以下几项正确**. 即:
|
96 |
+
#
|
97 |
+
#* `NVCC path`: 指向了 cuda 中的 nvcc 编译器. (`nvcc` 是 cuda 提供的一款编译器).
|
98 |
+
#
|
99 |
+
#* `CUDA_HOME`: cuda 安装的目录, 一般安装时会自动确定在 `/usr/local/cuda`,
|
100 |
+
#
|
101 |
+
#* `CONDA_PREFIX`: 是 `conda` 下创建的虚拟环境.
|
102 |
+
#
|
103 |
+
#* `PATH`: 应包含 cuda 的 bin 目录.
|
104 |
+
#
|
105 |
+
#* `LD_LIBARY_PATH`: 应包含 cuda 的 lib 目录.
|
106 |
+
#
|
107 |
+
#```
|
108 |
+
#(Transformers) [root@nlp bitsandbytes-0.39.1]# CUDA_VERSION=117 make cuda11x_nomatmul
|
109 |
+
#ENVIRONMENT
|
110 |
+
#============================
|
111 |
+
#CUDA_VERSION: 117
|
112 |
+
#============================
|
113 |
+
#NVCC path: /usr/local/cuda/bin/nvcc
|
114 |
+
#GPP path: /usr/bin/g++ VERSION: g++ (GCC) 11.1.0
|
115 |
+
#CUDA_HOME: /usr/local/cuda
|
116 |
+
#CONDA_PREFIX: /usr/local/miniconda3/envs/Transformers
|
117 |
+
#PATH: /usr/local/miniconda3/envs/Transformers/bin:/usr/local/miniconda3/condabin:/usr/local/sbin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin:/usr/local/cuda/bin:/root/bin
|
118 |
+
#LD_LIBRARY_PATH: /usr/local/cuda/lib64
|
119 |
+
#============================
|
120 |
+
#```
|
121 |
+
#
|
122 |
+
#我的情况是, 本应该是 `LD_LIBRARY_PATH: /usr/local/cuda/lib64` 的项变成了 `LD_LIBRARY_PATH:`.
|
123 |
+
#
|
124 |
+
#检查 `cat ~/.bashrc` 中包含:
|
125 |
+
#
|
126 |
+
#```text
|
127 |
+
#CUDA_HOME="/usr/local/cuda"
|
128 |
+
#PATH=/usr/local/sbin:/sbin:/bin:/usr/sbin:/usr/bin:/root/bin:/usr/local/miniconda3/bin:/usr/local/cuda/bin
|
129 |
+
#LD_LIBRARY_PATH=/usr/local/cuda/lib64
|
130 |
+
#```
|
131 |
+
#
|
132 |
+
#同时再执行
|
133 |
+
#
|
134 |
+
#```text
|
135 |
+
#export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
136 |
+
#```
|
137 |
+
#
|
138 |
+
#之后就编译成功了.
|
139 |
+
#
|
140 |
+
#**检查是否安装成功**
|
141 |
+
#
|
142 |
+
#重新连接 Terminal 之后, 在执行 `python -m bitsandbytes` 之前先执行以下命令.
|
143 |
+
#
|
144 |
+
#```text
|
145 |
+
#export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
146 |
+
#```
|
147 |
+
#
|
148 |
+
#这一步非常奇怪, 因为 `echo $LD_LIBRARY_PATH`, 可以看到 `/usr/local/cuda/lib64` 路径在其中. `echo $PATH` 都可以看到 `/usr/local/cuda` 在其中.
|
149 |
+
#
|
150 |
+
#但是执行 `CUDA_VERSION=117 make cuda11x_nomatmul` 时会发现原本应该是 `LD_LIBRARY_PATH: /usr/local/cuda/lib64` 的项变成了 `LD_LIBRARY_PATH:`.
|
151 |
+
#
|
152 |
+
#只要再执行一次以上命名, 再执行以下命令, 就可以成功.
|
153 |
+
#
|
154 |
+
#安装后执行以下命令, 检查是否安装成功.
|
155 |
+
#
|
156 |
+
#```
|
157 |
+
#python -m bitsandbytes
|
158 |
+
#```
|
159 |
+
#
|
160 |
+
#如过出现以下内容, 说明安装成功了.
|
161 |
+
#
|
162 |
+
#```text
|
163 |
+
#...
|
164 |
+
#...
|
165 |
+
#...
|
166 |
+
#Running a quick check that:
|
167 |
+
# + library is importable
|
168 |
+
# + CUDA function is callable
|
169 |
+
#
|
170 |
+
#
|
171 |
+
#WARNING: Please be sure to sanitize sensible info from any such env vars!
|
172 |
+
#
|
173 |
+
#SUCCESS!
|
174 |
+
#Installation was successful!
|
175 |
+
#```
|
176 |
+
|
177 |
+
|
178 |
+
# sh install_bitsandbytes.sh --stage 0 --stop_stage 0
|
179 |
+
|
180 |
+
|
181 |
+
verbose=true;
|
182 |
+
stage=0 # start from 0 if you need to start from data preparation
|
183 |
+
stop_stage=5
|
184 |
+
|
185 |
+
|
186 |
+
# parse options
|
187 |
+
while true; do
|
188 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
189 |
+
case "$1" in
|
190 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
191 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
192 |
+
old_value="(eval echo \\$$name)";
|
193 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
194 |
+
was_bool=true;
|
195 |
+
else
|
196 |
+
was_bool=false;
|
197 |
+
fi
|
198 |
+
|
199 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
200 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
201 |
+
eval "${name}=\"$2\"";
|
202 |
+
|
203 |
+
# Check that Boolean-valued arguments are really Boolean.
|
204 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
205 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
206 |
+
exit 1;
|
207 |
+
fi
|
208 |
+
shift 2;
|
209 |
+
;;
|
210 |
+
|
211 |
+
*) break;
|
212 |
+
esac
|
213 |
+
done
|
214 |
+
|
215 |
+
|
216 |
+
work_dir="$(pwd)"
|
217 |
+
thirdparty_dir="${work_dir}/thirdparty"
|
218 |
+
|
219 |
+
mkdir -p "${thirdparty_dir}"
|
220 |
+
|
221 |
+
|
222 |
+
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
223 |
+
$verbose && echo "stage 0: download bitsandbytes"
|
224 |
+
cd "${thirdparty_dir}" || exit 1;
|
225 |
+
|
226 |
+
wget https://github.com/TimDettmers/bitsandbytes/archive/refs/tags/0.39.1.zip
|
227 |
+
unzip 0.39.1.zip
|
228 |
+
rm -rf 0.39.1.zip
|
229 |
+
|
230 |
+
fi
|
231 |
+
|
232 |
+
|
script/install_conda.sh
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
# https://www.5axxw.com/questions/simple/umiecs
|
4 |
+
|
5 |
+
# params:
|
6 |
+
system_version="centos";
|
7 |
+
|
8 |
+
|
9 |
+
# parse options
|
10 |
+
while true; do
|
11 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
12 |
+
case "$1" in
|
13 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
14 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
15 |
+
old_value="(eval echo \\$$name)";
|
16 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
17 |
+
was_bool=true;
|
18 |
+
else
|
19 |
+
was_bool=false;
|
20 |
+
fi
|
21 |
+
|
22 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
23 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
24 |
+
eval "${name}=\"$2\"";
|
25 |
+
|
26 |
+
# Check that Boolean-valued arguments are really Boolean.
|
27 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
28 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
29 |
+
exit 1;
|
30 |
+
fi
|
31 |
+
shift 2;
|
32 |
+
;;
|
33 |
+
|
34 |
+
*) break;
|
35 |
+
esac
|
36 |
+
done
|
37 |
+
|
38 |
+
|
39 |
+
echo "system_version: ${system_version}";
|
40 |
+
|
41 |
+
|
42 |
+
if [ ${system_version} = "centos" ]; then
|
43 |
+
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
|
44 |
+
|
45 |
+
bash Miniconda3-latest-Linux-x86_64.sh
|
46 |
+
|
47 |
+
/usr/local/miniconda3/bin/conda --version
|
48 |
+
|
49 |
+
cat ~/.bashrc
|
50 |
+
echo "PATH=$PATH:/usr/local/miniconda3/bin" >> /root/.bashrc
|
51 |
+
source ~/.bashrc
|
52 |
+
|
53 |
+
conda --version
|
54 |
+
|
55 |
+
fi
|
56 |
+
|
57 |
+
|
58 |
+
|
script/install_cuda.sh
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
# 查看系统架构 Architecture
|
4 |
+
# >>> uname -a
|
5 |
+
# Linux nlp 3.10.0-1160.66.1.el7.x86_64 #1 SMP Wed May 18 16:02:34 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux
|
6 |
+
# >>> uname -m
|
7 |
+
# x86_64
|
8 |
+
|
9 |
+
|
10 |
+
#cuda驱动就像普通的软件一样, 可以安装多个.
|
11 |
+
|
12 |
+
|
13 |
+
#在以下路径找到对应版本, 获得安装命令.
|
14 |
+
#https://developer.nvidia.com/cuda-toolkit-archive
|
15 |
+
#
|
16 |
+
#参考链接:
|
17 |
+
#https://www.cnblogs.com/yuezc/p/12937239.html
|
18 |
+
#https://blog.csdn.net/pursuit_zhangyu/article/details/117073126
|
19 |
+
#
|
20 |
+
#[root@nlp dep]# sh cuda_10.2.89_440.33.01_linux.run --override
|
21 |
+
#(执行以上命令后, 安提示操作, 以下是安装完成后的信息).
|
22 |
+
#===========
|
23 |
+
#= Summary =
|
24 |
+
#===========
|
25 |
+
#
|
26 |
+
#Driver: Installed
|
27 |
+
#Toolkit: Installed in /usr/local/cuda-10.2/
|
28 |
+
#Samples: Installed in /home/admin/, but missing recommended libraries
|
29 |
+
#
|
30 |
+
#Please make sure that
|
31 |
+
# - PATH includes /usr/local/cuda-10.2/bin
|
32 |
+
# - LD_LIBRARY_PATH includes /usr/local/cuda-10.2/lib64, or, add /usr/local/cuda-10.2/lib64 to /etc/ld.so.conf and run ldconfig as root
|
33 |
+
#
|
34 |
+
#To uninstall the CUDA Toolkit, run cuda-uninstaller in /usr/local/cuda-10.2/bin
|
35 |
+
#To uninstall the NVIDIA Driver, run nvidia-uninstall
|
36 |
+
#
|
37 |
+
#Please see CUDA_Installation_Guide_Linux.pdf in /usr/local/cuda-10.2/doc/pdf for detailed information on setting up CUDA.
|
38 |
+
#Logfile is /var/log/cuda-installer.log
|
39 |
+
|
40 |
+
|
41 |
+
# params:
|
42 |
+
system_version="centos";
|
43 |
+
|
44 |
+
|
45 |
+
# parse options
|
46 |
+
while true; do
|
47 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
48 |
+
case "$1" in
|
49 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
50 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
51 |
+
old_value="(eval echo \\$$name)";
|
52 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
53 |
+
was_bool=true;
|
54 |
+
else
|
55 |
+
was_bool=false;
|
56 |
+
fi
|
57 |
+
|
58 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
59 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
60 |
+
eval "${name}=\"$2\"";
|
61 |
+
|
62 |
+
# Check that Boolean-valued arguments are really Boolean.
|
63 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
64 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
65 |
+
exit 1;
|
66 |
+
fi
|
67 |
+
shift 2;
|
68 |
+
;;
|
69 |
+
|
70 |
+
*) break;
|
71 |
+
esac
|
72 |
+
done
|
73 |
+
|
74 |
+
|
75 |
+
echo "system_version: ${system_version}";
|
76 |
+
|
77 |
+
|
78 |
+
if [ ${system_version} = "centos" ]; then
|
79 |
+
#runfile(local)
|
80 |
+
wget https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
|
81 |
+
sudo sh cuda_11.7.0_515.43.04_linux.run --override
|
82 |
+
|
83 |
+
#只选择安装 CUDA Toolkit 11.7 其它取消选择.
|
84 |
+
|
85 |
+
rm -rf /usr/local/cuda
|
86 |
+
ln -snf /usr/local/cuda-11.7 /usr/local/cuda
|
87 |
+
|
88 |
+
#export CUDA_HOME=/usr/local/cuda
|
89 |
+
#export PATH="${CUDA_HOME}/bin${PATH:+:$PATH}"
|
90 |
+
#export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
91 |
+
|
92 |
+
#export PATH=$PATH:/usr/local/cuda/bin
|
93 |
+
#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
|
94 |
+
|
95 |
+
cat ~/.bashrc
|
96 |
+
echo "PATH=$PATH:/usr/local/cuda/bin" >> /root/.bashrc
|
97 |
+
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64" >> /root/.bashrc
|
98 |
+
source ~/.bashrc
|
99 |
+
|
100 |
+
#查看cuda版本
|
101 |
+
nvcc -V
|
102 |
+
|
103 |
+
fi
|
script/install_nvidia_driver.sh
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
#GPU驱动安装需要先将原有的显示关闭, 重启机器, 再进行安装.
|
3 |
+
#参考链接:
|
4 |
+
#https://blog.csdn.net/kingschan/article/details/19033595
|
5 |
+
#https://blog.csdn.net/HaixWang/article/details/90408538
|
6 |
+
#
|
7 |
+
#>>> yum install -y pciutils
|
8 |
+
#查看 linux 机器上是否有 GPU
|
9 |
+
#lspci |grep -i nvidia
|
10 |
+
#
|
11 |
+
#>>> lspci |grep -i nvidia
|
12 |
+
#00:08.0 3D controller: NVIDIA Corporation TU104GL [Tesla T4] (rev a1)
|
13 |
+
#
|
14 |
+
#
|
15 |
+
#NVIDIA 驱动程序下载
|
16 |
+
#先在 pytorch 上查看应该用什么 cuda 版本, 再安装对应的 cuda-toolkit cuda.
|
17 |
+
#再根据 gpu 版本下载安装对应的 nvidia 驱动
|
18 |
+
#
|
19 |
+
## pytorch 版本
|
20 |
+
#https://pytorch.org/get-started/locally/
|
21 |
+
#
|
22 |
+
## CUDA 下载 (好像不需要这个)
|
23 |
+
#https://developer.nvidia.com/cuda-toolkit-archive
|
24 |
+
#
|
25 |
+
## nvidia 驱动
|
26 |
+
#https://www.nvidia.cn/Download/index.aspx?lang=cn
|
27 |
+
#http://www.nvidia.com/Download/index.aspx
|
28 |
+
#
|
29 |
+
#在下方的下拉列表中进行选择,针对您的 NVIDIA 产品确定合适的驱动。
|
30 |
+
#产品类型:
|
31 |
+
#Data Center / Tesla
|
32 |
+
#产品系列:
|
33 |
+
#T-Series
|
34 |
+
#产品家族:
|
35 |
+
#Tesla T4
|
36 |
+
#操作系统:
|
37 |
+
#Linux 64-bit
|
38 |
+
#CUDA Toolkit:
|
39 |
+
#10.2
|
40 |
+
#语言:
|
41 |
+
#Chinese (Simpleified)
|
42 |
+
#
|
43 |
+
#
|
44 |
+
#>>> mkdir -p /data/tianxing
|
45 |
+
#>>> cd /data/tianxing
|
46 |
+
#>>> wget https://cn.download.nvidia.com/tesla/440.118.02/NVIDIA-Linux-x86_64-440.118.02.run
|
47 |
+
#>>> sh NVIDIA-Linux-x86_64-440.118.02.run
|
48 |
+
#
|
49 |
+
## 异常:
|
50 |
+
#ERROR: The Nouveau kernel driver is currently in use by your system. This driver is incompatible with the NVIDIA driver, and must be disabled before proceeding. Please consult the NVIDIA driver README and your
|
51 |
+
#Linux distribution's documentation for details on how to correctly disable the Nouveau kernel driver.
|
52 |
+
#[OK]
|
53 |
+
#
|
54 |
+
#For some distributions, Nouveau can be disabled by adding a file in the modprobe configuration directory. Would you like nvidia-installer to attempt to create this modprobe file for you?
|
55 |
+
#[NO]
|
56 |
+
#
|
57 |
+
#ERROR: Installation has failed. Please see the file '/var/log/nvidia-installer.log' for details. You may find suggestions on fixing installation problems in the README available on the Linux driver download
|
58 |
+
#page at www.nvidia.com.
|
59 |
+
#[OK]
|
60 |
+
#
|
61 |
+
## 参考链接:
|
62 |
+
#https://blog.csdn.net/kingschan/article/details/19033595
|
63 |
+
#
|
64 |
+
## 禁用原有的显卡驱动 nouveau
|
65 |
+
#>>> echo -e "blacklist nouveau\noptions nouveau modeset=0\n" > /etc/modprobe.d/blacklist-nouveau.conf
|
66 |
+
#>>> sudo dracut --force
|
67 |
+
## 重启
|
68 |
+
#>>> reboot
|
69 |
+
#
|
70 |
+
#>>> init 3
|
71 |
+
#>>> sh NVIDIA-Linux-x86_64-440.118.02.run
|
72 |
+
#
|
73 |
+
## 异常
|
74 |
+
#ERROR: Unable to find the kernel source tree for the currently running kernel. Please make sure you have installed the kernel source files for your kernel and that they are properly configured; on Red Hat Linux systems, for example, be sure you have the 'kernel-source' or 'kernel-devel' RPM installed. If you know the correct kernel source files are installed, you may specify the kernel source path with the '--kernel-source-path' command line option.
|
75 |
+
#[OK]
|
76 |
+
#ERROR: Installation has failed. Please see the file '/var/log/nvidia-installer.log' for details. You may find suggestions on fixing installation problems in the README available on the Linux driver download
|
77 |
+
#page at www.nvidia.com.
|
78 |
+
#[OK]
|
79 |
+
#
|
80 |
+
## 参考链接
|
81 |
+
## https://blog.csdn.net/HaixWang/article/details/90408538
|
82 |
+
#
|
83 |
+
#>>> uname -r
|
84 |
+
#3.10.0-1160.49.1.el7.x86_64
|
85 |
+
#>>> yum install kernel-devel kernel-headers -y
|
86 |
+
#>>> yum info kernel-devel kernel-headers
|
87 |
+
#>>> yum install -y "kernel-devel-uname-r == $(uname -r)"
|
88 |
+
#>>> yum -y distro-sync
|
89 |
+
#
|
90 |
+
#>>> sh NVIDIA-Linux-x86_64-440.118.02.run
|
91 |
+
#
|
92 |
+
## 安装成功
|
93 |
+
#WARNING: nvidia-installer was forced to guess the X library path '/usr/lib64' and X module path '/usr/lib64/xorg/modules'; these paths were not queryable from the system. If X fails to find the NVIDIA X driver
|
94 |
+
#module, please install the `pkg-config` utility and the X.Org SDK/development package for your distribution and reinstall the driver.
|
95 |
+
#[OK]
|
96 |
+
#Install NVIDIA's 32-bit compatibility libraries?
|
97 |
+
#[YES]
|
98 |
+
#Installation of the kernel module for the NVIDIA Accelerated Graphics Driver for Linux-x86_64 (version 440.118.02) is now complete.
|
99 |
+
#[OK]
|
100 |
+
#
|
101 |
+
#
|
102 |
+
## 查看 GPU 使用情况; watch -n 1 -d nvidia-smi 每1秒刷新一次.
|
103 |
+
#>>> nvidia-smi
|
104 |
+
#Thu Mar 9 12:00:37 2023
|
105 |
+
#+-----------------------------------------------------------------------------+
|
106 |
+
#| NVIDIA-SMI 440.118.02 Driver Version: 440.118.02 CUDA Version: 10.2 |
|
107 |
+
#|-------------------------------+----------------------+----------------------+
|
108 |
+
#| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
|
109 |
+
#| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|
110 |
+
#|===============================+======================+======================|
|
111 |
+
#| 0 Tesla T4 Off | 00000000:00:08.0 Off | Off |
|
112 |
+
#| N/A 54C P0 22W / 70W | 0MiB / 16127MiB | 0% Default |
|
113 |
+
#+-------------------------------+----------------------+----------------------+
|
114 |
+
#
|
115 |
+
#+-----------------------------------------------------------------------------+
|
116 |
+
#| Processes: GPU Memory |
|
117 |
+
#| GPU PID Type Process name Usage |
|
118 |
+
#|=============================================================================|
|
119 |
+
#| No running processes found |
|
120 |
+
#+-----------------------------------------------------------------------------+
|
121 |
+
#
|
122 |
+
#
|
123 |
+
|
124 |
+
# params
|
125 |
+
stage=1
|
126 |
+
nvidia_driver_filename=https://cn.download.nvidia.com/tesla/440.118.02/NVIDIA-Linux-x86_64-440.118.02.run
|
127 |
+
|
128 |
+
# parse options
|
129 |
+
while true; do
|
130 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
131 |
+
case "$1" in
|
132 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
133 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
134 |
+
old_value="(eval echo \\$$name)";
|
135 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
136 |
+
was_bool=true;
|
137 |
+
else
|
138 |
+
was_bool=false;
|
139 |
+
fi
|
140 |
+
|
141 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
142 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
143 |
+
eval "${name}=\"$2\"";
|
144 |
+
|
145 |
+
# Check that Boolean-valued arguments are really Boolean.
|
146 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
147 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
148 |
+
exit 1;
|
149 |
+
fi
|
150 |
+
shift 2;
|
151 |
+
;;
|
152 |
+
|
153 |
+
*) break;
|
154 |
+
esac
|
155 |
+
done
|
156 |
+
|
157 |
+
echo "stage: ${stage}";
|
158 |
+
|
159 |
+
yum -y install wget
|
160 |
+
yum -y install sudo
|
161 |
+
|
162 |
+
if [ ${stage} -eq 0 ]; then
|
163 |
+
mkdir -p /data/dep
|
164 |
+
cd /data/dep || echo 1;
|
165 |
+
wget -P /data/dep ${nvidia_driver_filename}
|
166 |
+
|
167 |
+
echo -e "blacklist nouveau\noptions nouveau modeset=0\n" > /etc/modprobe.d/blacklist-nouveau.conf
|
168 |
+
sudo dracut --force
|
169 |
+
# 重启
|
170 |
+
reboot
|
171 |
+
elif [ ${stage} -eq 1 ]; then
|
172 |
+
init 3
|
173 |
+
|
174 |
+
yum install -y kernel-devel kernel-headers
|
175 |
+
yum info kernel-devel kernel-headers
|
176 |
+
yum install -y "kernel-devel-uname-r == $(uname -r)"
|
177 |
+
yum -y distro-sync
|
178 |
+
|
179 |
+
cd /data/dep || echo 1;
|
180 |
+
|
181 |
+
# 安装时, 需要回车三下.
|
182 |
+
sh NVIDIA-Linux-x86_64-440.118.02.run
|
183 |
+
nvidia-smi
|
184 |
+
fi
|
script/install_openssl.sh
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
# params:
|
4 |
+
system_version="centos";
|
5 |
+
|
6 |
+
|
7 |
+
# parse options
|
8 |
+
while true; do
|
9 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
10 |
+
case "$1" in
|
11 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
12 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
13 |
+
old_value="(eval echo \\$$name)";
|
14 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
15 |
+
was_bool=true;
|
16 |
+
else
|
17 |
+
was_bool=false;
|
18 |
+
fi
|
19 |
+
|
20 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
21 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
22 |
+
eval "${name}=\"$2\"";
|
23 |
+
|
24 |
+
# Check that Boolean-valued arguments are really Boolean.
|
25 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
26 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
27 |
+
exit 1;
|
28 |
+
fi
|
29 |
+
shift 2;
|
30 |
+
;;
|
31 |
+
|
32 |
+
*) break;
|
33 |
+
esac
|
34 |
+
done
|
35 |
+
|
36 |
+
echo "system_version: ${system_version}";
|
37 |
+
|
38 |
+
|
39 |
+
if [ ${system_version} = "centos" ]; then
|
40 |
+
mkdir -p /data/dep
|
41 |
+
cd /data/dep || exit 1;
|
42 |
+
|
43 |
+
if [ ! -e openssl-1.1.1n.tar.gz ]; then
|
44 |
+
wget https://www.openssl.org/source/openssl-1.1.1n.tar.gz --no-check-certificate
|
45 |
+
fi
|
46 |
+
|
47 |
+
cd /data/dep || exit 1;
|
48 |
+
if [ ! -d openssl-1.1.1n ]; then
|
49 |
+
tar -zxvf openssl-1.1.1n.tar.gz
|
50 |
+
|
51 |
+
cd /data/dep/openssl-1.1.1n || exit 1;
|
52 |
+
fi
|
53 |
+
|
54 |
+
./Configure --prefix=/usr/local/openssl
|
55 |
+
|
56 |
+
make -j && make install
|
57 |
+
|
58 |
+
fi
|
script/install_python.sh
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
# sh ./script/install_python.sh --system_version "centos" --python_version "3.10.11"
|
4 |
+
|
5 |
+
# 参数:
|
6 |
+
python_version="3.8.10";
|
7 |
+
system_version="centos";
|
8 |
+
|
9 |
+
|
10 |
+
# parse options
|
11 |
+
while true; do
|
12 |
+
[ -z "${1:-}" ] && break; # break if there are no arguments
|
13 |
+
case "$1" in
|
14 |
+
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
|
15 |
+
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
|
16 |
+
old_value="(eval echo \\$$name)";
|
17 |
+
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
|
18 |
+
was_bool=true;
|
19 |
+
else
|
20 |
+
was_bool=false;
|
21 |
+
fi
|
22 |
+
|
23 |
+
# Set the variable to the right value-- the escaped quotes make it work if
|
24 |
+
# the option had spaces, like --cmd "queue.pl -sync y"
|
25 |
+
eval "${name}=\"$2\"";
|
26 |
+
|
27 |
+
# Check that Boolean-valued arguments are really Boolean.
|
28 |
+
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
|
29 |
+
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
|
30 |
+
exit 1;
|
31 |
+
fi
|
32 |
+
shift 2;
|
33 |
+
;;
|
34 |
+
|
35 |
+
*) break;
|
36 |
+
esac
|
37 |
+
done
|
38 |
+
|
39 |
+
echo "python_version: ${python_version}";
|
40 |
+
echo "system_version: ${system_version}";
|
41 |
+
|
42 |
+
|
43 |
+
if [ ${system_version} = "centos" ]; then
|
44 |
+
# 安装 python 开发编译环境
|
45 |
+
yum -y groupinstall "Development tools"
|
46 |
+
yum -y install zlib-devel bzip2-devel openssl-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel
|
47 |
+
yum install libffi-devel -y
|
48 |
+
yum install -y wget
|
49 |
+
yum install -y make
|
50 |
+
|
51 |
+
mkdir /data/dep
|
52 |
+
# wget -P /data/dep https://www.python.org/ftp/python/3.10.11/Python-3.10.11.tgz
|
53 |
+
wget -P /data/dep https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz
|
54 |
+
|
55 |
+
cd /data/dep || exit 1;
|
56 |
+
tar -zxvf Python-${python_version}.tgz
|
57 |
+
cd /data/dep/Python-${python_version} || exit 1;
|
58 |
+
|
59 |
+
mkdir /usr/local/python-${python_version}
|
60 |
+
./configure --prefix=/usr/local/python-${python_version}
|
61 |
+
make && make install
|
62 |
+
|
63 |
+
# /usr/local/python-3.10.11/bin/python3 -V
|
64 |
+
# /usr/local/python-3.10.11/bin/pip3 -V
|
65 |
+
/usr/local/python-${python_version}/bin/python3 -V
|
66 |
+
/usr/local/python-${python_version}/bin/pip3 -V
|
67 |
+
|
68 |
+
rm -rf /usr/local/bin/python3
|
69 |
+
rm -rf /usr/local/bin/pip3
|
70 |
+
# ln -s /usr/local/python-3.10.11/bin/python3 /usr/local/bin/python3
|
71 |
+
# ln -s /usr/local/python-3.10.11/bin/pip3 /usr/local/bin/pip3
|
72 |
+
ln -s /usr/local/python-${python_version}/bin/python3 /usr/local/bin/python3
|
73 |
+
ln -s /usr/local/python-${python_version}/bin/pip3 /usr/local/bin/pip3
|
74 |
+
|
75 |
+
python3 -V
|
76 |
+
pip3 -V
|
77 |
+
|
78 |
+
elif [ ${system_version} = "ubuntu" ]; then
|
79 |
+
# 安装 python 开发编译环境
|
80 |
+
# https://zhuanlan.zhihu.com/p/506491209
|
81 |
+
|
82 |
+
# 刷新软件包目录
|
83 |
+
sudo apt update
|
84 |
+
# 列出当前可用的更新
|
85 |
+
sudo apt list --upgradable
|
86 |
+
# 如上一步提示有可以更新的项目,则执行更新
|
87 |
+
sudo apt -y upgrade
|
88 |
+
# 安装 GCC 编译器
|
89 |
+
sudo apt install gcc
|
90 |
+
# 检查安装是否成功
|
91 |
+
gcc -v
|
92 |
+
|
93 |
+
# 安装依赖
|
94 |
+
sudo apt install -y build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libbz2-dev liblzma-dev sqlite3 libsqlite3-dev tk-dev uuid-dev libgdbm-compat-dev
|
95 |
+
|
96 |
+
mkdir /data/dep
|
97 |
+
|
98 |
+
# sudo wget -P /data/dep https://www.python.org/ftp/python/3.6.5/Python-3.6.5.tgz
|
99 |
+
sudo wget -P /data/dep https://www.python.org/ftp/python/${python_version}/Python-${python_version}.tgz
|
100 |
+
|
101 |
+
cd /data/dep || exit 1;
|
102 |
+
# tar -zxvf Python-3.6.5.tgz
|
103 |
+
tar -zxvf Python-${python_version}.tgz
|
104 |
+
# cd /data/dep/Python-3.6.5
|
105 |
+
cd /data/dep/Python-${python_version} || exit 1;
|
106 |
+
# mkdir /usr/local/python-3.6.5
|
107 |
+
mkdir /usr/local/python-${python_version}
|
108 |
+
|
109 |
+
# 检查依赖与配置编译
|
110 |
+
# sudo ./configure --prefix=/usr/local/python-3.6.5 --enable-optimizations --with-lto --enable-shared
|
111 |
+
sudo ./configure --prefix=/usr/local/python-${python_version} --enable-optimizations --with-lto --enable-shared
|
112 |
+
cpu_count=$(cat /proc/cpuinfo | grep processor | wc -l)
|
113 |
+
# sudo make -j 4
|
114 |
+
sudo make -j "${cpu_count}"
|
115 |
+
|
116 |
+
/usr/local/python-${python_version}/bin/python3 -V
|
117 |
+
/usr/local/python-${python_version}/bin/pip3 -V
|
118 |
+
|
119 |
+
rm -rf /usr/local/bin/python3
|
120 |
+
rm -rf /usr/local/bin/pip3
|
121 |
+
ln -s /usr/local/python-${python_version}/bin/python3 /usr/local/bin/python3
|
122 |
+
ln -s /usr/local/python-${python_version}/bin/pip3 /usr/local/bin/pip3
|
123 |
+
|
124 |
+
python3 -V
|
125 |
+
pip3 -V
|
126 |
+
fi
|
toolbox/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/json/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/json/misc.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
from typing import Callable
|
4 |
+
|
5 |
+
|
6 |
+
def traverse(js, callback: Callable, *args, **kwargs):
|
7 |
+
if isinstance(js, list):
|
8 |
+
result = list()
|
9 |
+
for l in js:
|
10 |
+
l = traverse(l, callback, *args, **kwargs)
|
11 |
+
result.append(l)
|
12 |
+
return result
|
13 |
+
elif isinstance(js, tuple):
|
14 |
+
result = list()
|
15 |
+
for l in js:
|
16 |
+
l = traverse(l, callback, *args, **kwargs)
|
17 |
+
result.append(l)
|
18 |
+
return tuple(result)
|
19 |
+
elif isinstance(js, dict):
|
20 |
+
result = dict()
|
21 |
+
for k, v in js.items():
|
22 |
+
k = traverse(k, callback, *args, **kwargs)
|
23 |
+
v = traverse(v, callback, *args, **kwargs)
|
24 |
+
result[k] = v
|
25 |
+
return result
|
26 |
+
elif isinstance(js, int):
|
27 |
+
return callback(js, *args, **kwargs)
|
28 |
+
elif isinstance(js, str):
|
29 |
+
return callback(js, *args, **kwargs)
|
30 |
+
else:
|
31 |
+
return js
|
32 |
+
|
33 |
+
|
34 |
+
def demo1():
|
35 |
+
d = {
|
36 |
+
"env": "ppe",
|
37 |
+
"mysql_connect": {
|
38 |
+
"host": "$mysql_connect_host",
|
39 |
+
"port": 3306,
|
40 |
+
"user": "callbot",
|
41 |
+
"password": "NxcloudAI2021!",
|
42 |
+
"database": "callbot_ppe",
|
43 |
+
"charset": "utf8"
|
44 |
+
},
|
45 |
+
"es_connect": {
|
46 |
+
"hosts": ["10.20.251.8"],
|
47 |
+
"http_auth": ["elastic", "ElasticAI2021!"],
|
48 |
+
"port": 9200
|
49 |
+
}
|
50 |
+
}
|
51 |
+
|
52 |
+
def callback(s):
|
53 |
+
if isinstance(s, str) and s.startswith('$'):
|
54 |
+
return s[1:]
|
55 |
+
return s
|
56 |
+
|
57 |
+
result = traverse(d, callback=callback)
|
58 |
+
print(result)
|
59 |
+
return
|
60 |
+
|
61 |
+
|
62 |
+
if __name__ == '__main__':
|
63 |
+
demo1()
|
toolbox/os/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/os/environment.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from dotenv.main import DotEnv
|
8 |
+
|
9 |
+
from toolbox.json.misc import traverse
|
10 |
+
|
11 |
+
|
12 |
+
class EnvironmentManager(object):
|
13 |
+
def __init__(self, path, env, override=False):
|
14 |
+
filename = os.path.join(path, '{}.env'.format(env))
|
15 |
+
self.filename = filename
|
16 |
+
|
17 |
+
load_dotenv(
|
18 |
+
dotenv_path=filename,
|
19 |
+
override=override
|
20 |
+
)
|
21 |
+
|
22 |
+
self._environ = dict()
|
23 |
+
|
24 |
+
def open_dotenv(self, filename: str = None):
|
25 |
+
filename = filename or self.filename
|
26 |
+
dotenv = DotEnv(
|
27 |
+
dotenv_path=filename,
|
28 |
+
stream=None,
|
29 |
+
verbose=False,
|
30 |
+
interpolate=False,
|
31 |
+
override=False,
|
32 |
+
encoding="utf-8",
|
33 |
+
)
|
34 |
+
result = dotenv.dict()
|
35 |
+
return result
|
36 |
+
|
37 |
+
def get(self, key, default=None, dtype=str):
|
38 |
+
result = os.environ.get(key)
|
39 |
+
if result is None:
|
40 |
+
if default is None:
|
41 |
+
result = None
|
42 |
+
else:
|
43 |
+
result = default
|
44 |
+
else:
|
45 |
+
result = dtype(result)
|
46 |
+
self._environ[key] = result
|
47 |
+
return result
|
48 |
+
|
49 |
+
|
50 |
+
_DEFAULT_DTYPE_MAP = {
|
51 |
+
'int': int,
|
52 |
+
'float': float,
|
53 |
+
'str': str,
|
54 |
+
'json.loads': json.loads
|
55 |
+
}
|
56 |
+
|
57 |
+
|
58 |
+
class JsonConfig(object):
|
59 |
+
"""
|
60 |
+
将 json 中, 形如 `$float:threshold` 的值, 处理为:
|
61 |
+
从环境变量中查到 threshold, 再将其转换为 float 类型.
|
62 |
+
"""
|
63 |
+
def __init__(self, dtype_map: dict = None, environment: EnvironmentManager = None):
|
64 |
+
self.dtype_map = dtype_map or _DEFAULT_DTYPE_MAP
|
65 |
+
self.environment = environment or os.environ
|
66 |
+
|
67 |
+
def sanitize_by_filename(self, filename: str):
|
68 |
+
with open(filename, 'r', encoding='utf-8') as f:
|
69 |
+
js = json.load(f)
|
70 |
+
|
71 |
+
return self.sanitize_by_json(js)
|
72 |
+
|
73 |
+
def sanitize_by_json(self, js):
|
74 |
+
js = traverse(
|
75 |
+
js,
|
76 |
+
callback=self.sanitize,
|
77 |
+
environment=self.environment
|
78 |
+
)
|
79 |
+
return js
|
80 |
+
|
81 |
+
def sanitize(self, string, environment):
|
82 |
+
"""支持 $ 符开始的, 环境变量配置"""
|
83 |
+
if isinstance(string, str) and string.startswith('$'):
|
84 |
+
dtype, key = string[1:].split(':')
|
85 |
+
dtype = self.dtype_map[dtype]
|
86 |
+
|
87 |
+
value = environment.get(key)
|
88 |
+
if value is None:
|
89 |
+
raise AssertionError('environment not exist. key: {}'.format(key))
|
90 |
+
|
91 |
+
value = dtype(value)
|
92 |
+
result = value
|
93 |
+
else:
|
94 |
+
result = string
|
95 |
+
return result
|
96 |
+
|
97 |
+
|
98 |
+
def demo1():
|
99 |
+
import json
|
100 |
+
|
101 |
+
from project_settings import project_path
|
102 |
+
|
103 |
+
environment = EnvironmentManager(
|
104 |
+
path=os.path.join(project_path, 'server/callbot_server/dotenv'),
|
105 |
+
env='dev',
|
106 |
+
)
|
107 |
+
init_scenes = environment.get(key='init_scenes', dtype=json.loads)
|
108 |
+
print(init_scenes)
|
109 |
+
print(environment._environ)
|
110 |
+
return
|
111 |
+
|
112 |
+
|
113 |
+
if __name__ == '__main__':
|
114 |
+
demo1()
|
toolbox/os/other.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import inspect
|
3 |
+
|
4 |
+
|
5 |
+
def pwd():
|
6 |
+
"""你在哪个文件调用此函数, 它就会返回那个文件所在的 dir 目标"""
|
7 |
+
frame = inspect.stack()[1]
|
8 |
+
module = inspect.getmodule(frame[0])
|
9 |
+
return os.path.dirname(os.path.abspath(module.__file__))
|
toolbox/transformers/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/transformers/data/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/transformers/data/data_collator.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
from typing import Any, Dict, List
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
class SFTDataCollator(object):
|
8 |
+
def __init__(self, tokenizer, max_seq_length):
|
9 |
+
self.tokenizer = tokenizer
|
10 |
+
self.max_seq_length = max_seq_length
|
11 |
+
self.pad_token_id = tokenizer.pad_token_id
|
12 |
+
|
13 |
+
def __call__(self, batch: List[Dict[str, Any]]) -> Dict[str, Any]:
|
14 |
+
# 找出batch中的最大长度.
|
15 |
+
lengths = [len(x['input_ids']) for x in batch]
|
16 |
+
# 取出batch中的最大长度, 如果超过max_seq_length, 则取max_seq_length.
|
17 |
+
batch_max_len = min(max(lengths), self.max_seq_length)
|
18 |
+
# batch_max_len = self.max_seq_length
|
19 |
+
|
20 |
+
input_ids_batch, attention_mask_batch, target_mask_batch = [], [], []
|
21 |
+
# truncate and padding
|
22 |
+
for x in batch:
|
23 |
+
input_ids = x["input_ids"]
|
24 |
+
attention_mask = x["attention_mask"]
|
25 |
+
target_mask = x["target_mask"]
|
26 |
+
|
27 |
+
padding_len = batch_max_len - len(input_ids)
|
28 |
+
# padding
|
29 |
+
input_ids = input_ids + [self.pad_token_id] * padding_len
|
30 |
+
attention_mask = attention_mask + [0] * padding_len
|
31 |
+
target_mask = target_mask + [0] * padding_len
|
32 |
+
# truncate
|
33 |
+
input_ids = input_ids[:self.max_seq_length]
|
34 |
+
attention_mask = attention_mask[:self.max_seq_length]
|
35 |
+
target_mask = target_mask[:self.max_seq_length]
|
36 |
+
|
37 |
+
input_ids_batch.append(input_ids)
|
38 |
+
attention_mask_batch.append(attention_mask)
|
39 |
+
target_mask_batch.append(target_mask)
|
40 |
+
|
41 |
+
# 将list转换为tensor, 得到最终的的模型输入.
|
42 |
+
input_ids_batch = torch.tensor(input_ids_batch, dtype=torch.long)
|
43 |
+
attention_mask_batch = torch.tensor(attention_mask_batch, dtype=torch.long)
|
44 |
+
target_mask_batch = torch.tensor(target_mask_batch, dtype=torch.long)
|
45 |
+
inputs = {
|
46 |
+
"input_ids": input_ids_batch,
|
47 |
+
"attention_mask": attention_mask_batch,
|
48 |
+
"target_mask": target_mask_batch
|
49 |
+
}
|
50 |
+
return inputs
|
51 |
+
|
52 |
+
|
53 |
+
if __name__ == '__main__':
|
54 |
+
pass
|
toolbox/transformers/data/dataset/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/transformers/data/dataset/dataset.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import json
|
4 |
+
from typing import List
|
5 |
+
|
6 |
+
from torch.utils.data import Dataset
|
7 |
+
|
8 |
+
|
9 |
+
class SFTDataset(Dataset):
|
10 |
+
def __init__(self):
|
11 |
+
self.samples: List[dict] = list()
|
12 |
+
|
13 |
+
def read(self, filename: str):
|
14 |
+
samples = list()
|
15 |
+
with open(filename, "r", encoding="utf-8") as f:
|
16 |
+
for row in f:
|
17 |
+
row = str(row).strip()
|
18 |
+
row = json.loads(row)
|
19 |
+
samples.append(row)
|
20 |
+
self.samples = samples
|
21 |
+
return self
|
22 |
+
|
23 |
+
def __getitem__(self, index):
|
24 |
+
sample = self.samples[index]
|
25 |
+
return sample
|
26 |
+
|
27 |
+
def __len__(self):
|
28 |
+
return len(self.samples)
|
29 |
+
|
30 |
+
|
31 |
+
class ChatGLM2SFTDataset(SFTDataset):
|
32 |
+
def __init__(self, tokenizer, max_seq_length: int):
|
33 |
+
super(ChatGLM2SFTDataset, self).__init__()
|
34 |
+
self.tokenizer = tokenizer
|
35 |
+
self.max_seq_length = max_seq_length
|
36 |
+
|
37 |
+
self.input_format = '[Round {}]\n\n问:{}\n\n答:'
|
38 |
+
self.target_format = "{}"
|
39 |
+
|
40 |
+
def __getitem__(self, index):
|
41 |
+
sample = self.samples[index]
|
42 |
+
|
43 |
+
conversation = sample["conversation"]
|
44 |
+
|
45 |
+
utterances = list()
|
46 |
+
for i, x in enumerate(conversation):
|
47 |
+
human = self.input_format.format(i+1, x["human"])
|
48 |
+
assistant = self.target_format.format(x["assistant"])
|
49 |
+
utterances += ([human, assistant])
|
50 |
+
utterances_ids = self.tokenizer(utterances, add_special_tokens=False).input_ids
|
51 |
+
|
52 |
+
input_ids = list()
|
53 |
+
target_mask = list()
|
54 |
+
for i, utterances_id in enumerate(utterances_ids):
|
55 |
+
input_ids += utterances_id
|
56 |
+
if i % 2 == 0:
|
57 |
+
target_mask += [0] * (len(utterances_id))
|
58 |
+
else:
|
59 |
+
input_ids += [self.tokenizer.eos_token_id]
|
60 |
+
target_mask += [1] * (len(utterances_id) + 1)
|
61 |
+
|
62 |
+
assert len(input_ids) == len(target_mask)
|
63 |
+
|
64 |
+
input_ids = input_ids[:self.max_seq_length]
|
65 |
+
target_mask = target_mask[:self.max_seq_length]
|
66 |
+
attention_mask = [1] * len(input_ids)
|
67 |
+
|
68 |
+
assert len(input_ids) == len(target_mask) == len(attention_mask)
|
69 |
+
|
70 |
+
inputs = {
|
71 |
+
"input_ids": input_ids,
|
72 |
+
"attention_mask": attention_mask,
|
73 |
+
"target_mask": target_mask
|
74 |
+
}
|
75 |
+
return inputs
|
76 |
+
|
77 |
+
|
78 |
+
if __name__ == '__main__':
|
79 |
+
pass
|
toolbox/transformers/modules/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
pass
|
toolbox/transformers/modules/loss.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
|
6 |
+
|
7 |
+
class Loss(object):
|
8 |
+
"""
|
9 |
+
所有loss的类父类
|
10 |
+
"""
|
11 |
+
def __call__(self, model, inputs, training_args, return_outputs=False):
|
12 |
+
"""
|
13 |
+
用于计算loss.
|
14 |
+
看源码发现, return_outputs=True为train时调用, return_outputs=False为eval和predict调用
|
15 |
+
:param model: 模型
|
16 |
+
:param inputs: 模型输入, dict
|
17 |
+
:param training_args: 训练配置参数
|
18 |
+
:param return_outputs: 是否返回模型的输出
|
19 |
+
:return:
|
20 |
+
"""
|
21 |
+
raise NotImplemented
|
22 |
+
|
23 |
+
|
24 |
+
class TargetLMLoss(Loss):
|
25 |
+
|
26 |
+
def __init__(self, ignore_index):
|
27 |
+
super().__init__()
|
28 |
+
self.ignore_index = ignore_index
|
29 |
+
self.loss_fn = nn.CrossEntropyLoss(ignore_index=ignore_index)
|
30 |
+
|
31 |
+
def __call__(self, model, inputs, training_args, return_outputs=False):
|
32 |
+
input_ids = inputs["input_ids"]
|
33 |
+
attention_mask = inputs["attention_mask"]
|
34 |
+
target_mask = inputs["target_mask"]
|
35 |
+
|
36 |
+
# 模型前馈预测
|
37 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
|
38 |
+
logits = outputs["logits"] if isinstance(outputs, dict) else outputs[0]
|
39 |
+
|
40 |
+
# 将labels中不属于target的部分, 设为ignore_index, 只计算target部分的loss.
|
41 |
+
labels = torch.where(target_mask == 1, input_ids, self.ignore_index)
|
42 |
+
shift_logits = logits[..., :-1, :].contiguous()
|
43 |
+
shift_labels = labels[..., 1:].contiguous()
|
44 |
+
|
45 |
+
# Flatten the tokens
|
46 |
+
loss = self.loss_fn(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
|
47 |
+
return (loss, outputs) if return_outputs else loss
|
48 |
+
|
49 |
+
|
50 |
+
if __name__ == '__main__':
|
51 |
+
pass
|
toolbox/transformers/trainer.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import transformers
|
4 |
+
from transformers import (
|
5 |
+
PreTrainedModel,
|
6 |
+
TrainingArguments,
|
7 |
+
DataCollator,
|
8 |
+
PreTrainedTokenizerBase,
|
9 |
+
EvalPrediction,
|
10 |
+
TrainerCallback,
|
11 |
+
)
|
12 |
+
from typing import Callable, Dict, List, Optional, Tuple, Union, Any
|
13 |
+
from torch import nn
|
14 |
+
from torch.utils.data import Dataset, DataLoader
|
15 |
+
from transformers.utils import (
|
16 |
+
logging,
|
17 |
+
)
|
18 |
+
from typing import Optional
|
19 |
+
import os
|
20 |
+
import torch
|
21 |
+
|
22 |
+
|
23 |
+
logger = logging.get_logger(__name__)
|
24 |
+
|
25 |
+
# Name of the files used for checkpointing
|
26 |
+
TRAINING_ARGS_NAME = "training_args.bin"
|
27 |
+
TRAINER_STATE_NAME = "trainer_state.json"
|
28 |
+
OPTIMIZER_NAME = "optimizer.pt"
|
29 |
+
SCHEDULER_NAME = "scheduler.pt"
|
30 |
+
SCALER_NAME = "scaler.pt"
|
31 |
+
|
32 |
+
|
33 |
+
class Trainer(transformers.Trainer):
|
34 |
+
"""
|
35 |
+
主要修改逻辑: 通过传入compute_loss, 支持自定义loss计算方式.
|
36 |
+
"""
|
37 |
+
def __init__(
|
38 |
+
self,
|
39 |
+
model: Union[PreTrainedModel, nn.Module] = None,
|
40 |
+
args: TrainingArguments = None,
|
41 |
+
data_collator: Optional[DataCollator] = None,
|
42 |
+
train_dataset: Optional[Dataset] = None,
|
43 |
+
eval_dataset: Optional[Dataset] = None,
|
44 |
+
tokenizer: Optional[PreTrainedTokenizerBase] = None,
|
45 |
+
model_init: Callable[[], PreTrainedModel] = None,
|
46 |
+
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
|
47 |
+
callbacks: Optional[List[TrainerCallback]] = None,
|
48 |
+
optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
|
49 |
+
preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
|
50 |
+
compute_loss=None,
|
51 |
+
):
|
52 |
+
super(Trainer, self).__init__(
|
53 |
+
model=model,
|
54 |
+
args=args,
|
55 |
+
data_collator=data_collator,
|
56 |
+
train_dataset=train_dataset,
|
57 |
+
eval_dataset=eval_dataset,
|
58 |
+
tokenizer=tokenizer,
|
59 |
+
model_init=model_init,
|
60 |
+
compute_metrics=compute_metrics,
|
61 |
+
callbacks=callbacks,
|
62 |
+
optimizers=optimizers,
|
63 |
+
preprocess_logits_for_metrics=preprocess_logits_for_metrics,
|
64 |
+
)
|
65 |
+
self.loss_func = compute_loss
|
66 |
+
|
67 |
+
def compute_loss(self, model, inputs, return_outputs=False):
|
68 |
+
"""
|
69 |
+
重写loss的计算方式
|
70 |
+
How the loss is computed by Trainer. By default, all models return the loss in the first element.
|
71 |
+
|
72 |
+
Subclass and override for custom behavior.
|
73 |
+
"""
|
74 |
+
return self.loss_func(model, inputs, self.args, return_outputs)
|
75 |
+
|
76 |
+
|
77 |
+
class LoRATrainer(Trainer):
|
78 |
+
"""
|
79 |
+
修改checkpoint的保存逻辑, 只保存lora.
|
80 |
+
"""
|
81 |
+
def _save(self, output_dir: Optional[str] = None, state_dict=None):
|
82 |
+
# If we are executing this function, we are the process zero, so we don't check for that.
|
83 |
+
output_dir = output_dir if output_dir is not None else self.args.output_dir
|
84 |
+
os.makedirs(output_dir, exist_ok=True)
|
85 |
+
logger.info(f"Saving model checkpoint to {output_dir}")
|
86 |
+
# 保存lora权重和配置
|
87 |
+
self.model.save_pretrained(
|
88 |
+
output_dir, state_dict=state_dict, safe_serialization=self.args.save_safetensors
|
89 |
+
)
|
90 |
+
|
91 |
+
if self.tokenizer is not None:
|
92 |
+
self.tokenizer.save_pretrained(output_dir)
|
93 |
+
|
94 |
+
# Good practice: save your training arguments together with the trained model
|
95 |
+
torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
|
96 |
+
|
97 |
+
|
98 |
+
if __name__ == '__main__':
|
99 |
+
pass
|