Spaces:
Sleeping
Sleeping
Upload 6 files
Browse files- adapter_utils.py +14 -0
- config.py +53 -0
- data_utils.py +35 -0
- model_utils.py +48 -0
- quantization_utils.py +13 -0
- requirements.txt +10 -0
adapter_utils.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from peft import LoraConfig
|
2 |
+
|
3 |
+
import config
|
4 |
+
|
5 |
+
def load_adapter(target_modules):
|
6 |
+
peft_config = LoraConfig(
|
7 |
+
lora_alpha=config.LORA_ALPHA,
|
8 |
+
lora_dropout=config.LORA_DROPOUT,
|
9 |
+
r = config.LORA_RANK,
|
10 |
+
bias="none",
|
11 |
+
task_type=config.TASK_TYPE,
|
12 |
+
target_modules=target_modules
|
13 |
+
)
|
14 |
+
return peft_config
|
config.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
+
Data Configuration
|
3 |
+
'''
|
4 |
+
DATASET = "OpenAssistant/oasst1"
|
5 |
+
DATASET_TEXT_FIELD = "prompt_response"
|
6 |
+
|
7 |
+
'''
|
8 |
+
Model Configuration
|
9 |
+
'''
|
10 |
+
MODEL_NAME = "microsoft/phi-2"
|
11 |
+
TRUST_REMOTE_CODE = True
|
12 |
+
ENABLE_MODEL_CONFIG_CACHE = False
|
13 |
+
|
14 |
+
'''
|
15 |
+
Quantization Configuration
|
16 |
+
'''
|
17 |
+
ENABLE_4BIT = True
|
18 |
+
QUANTIZATION_TYPE = "nf4"
|
19 |
+
|
20 |
+
'''
|
21 |
+
Adapter Configuration
|
22 |
+
'''
|
23 |
+
LORA_ALPHA = 16
|
24 |
+
LORA_DROPOUT = 0.1
|
25 |
+
LORA_RANK = 64
|
26 |
+
TASK_TYPE = "CAUSAL_LM"
|
27 |
+
|
28 |
+
'''
|
29 |
+
Model Training Configuration
|
30 |
+
'''
|
31 |
+
MODEL_OUTPUT_DIR = "results/"
|
32 |
+
PER_DEVICE_TRAIN_BATCH_SIZE = 4
|
33 |
+
GRADIENT_ACCUMULATION_STEPS = 4
|
34 |
+
OPTIM = "paged_adamw_32bit"
|
35 |
+
SAVE_STEPS = 100
|
36 |
+
LOGGING_STEPS = 10
|
37 |
+
LEARNING_RATE = 2e-4
|
38 |
+
MAX_GRAD_NORM = 0.3
|
39 |
+
MAX_STEPS = 700
|
40 |
+
WARMUP_RATIO = 0.05
|
41 |
+
LR_SCHEDULER_TYPE = "constant"
|
42 |
+
ENABLE_FP_16 = True
|
43 |
+
ENABLE_GRADIENT_CHECKPOINTING=False
|
44 |
+
|
45 |
+
'''
|
46 |
+
Model Trainer Configuration
|
47 |
+
'''
|
48 |
+
MAX_SEQ_LENGTH = 512
|
49 |
+
|
50 |
+
'''
|
51 |
+
Inference Configuration
|
52 |
+
'''
|
53 |
+
TASK = "text-generation"
|
data_utils.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datasets
|
2 |
+
from datasets import load_dataset
|
3 |
+
|
4 |
+
import config
|
5 |
+
|
6 |
+
def download(mode):
|
7 |
+
print("Downloading Dataset - ", config.DATASET, "...")
|
8 |
+
dataset = load_dataset(config.DATASET, split=mode)
|
9 |
+
return dataset
|
10 |
+
|
11 |
+
def prepare_prompts_responses(dataset):
|
12 |
+
print("Preparing Prompt and Assistant....")
|
13 |
+
dataset_df = dataset.to_pandas()
|
14 |
+
user_prompters = dataset_df[(dataset_df.role=="prompter")]
|
15 |
+
user_prompters = user_prompters.set_index("message_id")
|
16 |
+
assistants = dataset_df[(dataset_df.role=="assistant") & (dataset_df["rank"] == 0.0)]
|
17 |
+
|
18 |
+
prompts_responses = []
|
19 |
+
for _,record in assistants.iterrows():
|
20 |
+
prompt_text = user_prompters.loc[record.parent_id,'text']
|
21 |
+
prompt_response = "### Human: " + prompt_text + " ### Assistant: " + record['text']
|
22 |
+
prompts_responses.append(prompt_response)
|
23 |
+
assistants[config.DATASET_TEXT_FIELD] = prompts_responses
|
24 |
+
|
25 |
+
return assistants
|
26 |
+
|
27 |
+
def preparedata(mode):
|
28 |
+
print("Preparing data for - ", mode, "...")
|
29 |
+
dataset = download(mode=mode)
|
30 |
+
prompts_responses = prepare_prompts_responses(dataset)
|
31 |
+
prompts_responses_dataset = datasets.Dataset.from_pandas(prompts_responses)
|
32 |
+
return prompts_responses_dataset
|
33 |
+
|
34 |
+
|
35 |
+
|
model_utils.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from trl import SFTTrainer
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
|
3 |
+
|
4 |
+
import config
|
5 |
+
|
6 |
+
def load_model(quantization_config):
|
7 |
+
model = AutoModelForCausalLM.from_pretrained(
|
8 |
+
config.MODEL_NAME,
|
9 |
+
quantization_config = quantization_config,
|
10 |
+
trust_remote_code = config.TRUST_REMOTE_CODE
|
11 |
+
)
|
12 |
+
model.config.use_cache = config.ENABLE_MODEL_CONFIG_CACHE
|
13 |
+
return model
|
14 |
+
|
15 |
+
def load_tokenizers():
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
17 |
+
config.MODEL_NAME,
|
18 |
+
trust_remote_code=config.TRUST_REMOTE_CODE)
|
19 |
+
return tokenizer
|
20 |
+
|
21 |
+
def load_training_arguments():
|
22 |
+
training_arguments = TrainingArguments(
|
23 |
+
output_dir=config.MODEL_OUTPUT_DIR,
|
24 |
+
per_device_train_batch_size=config.PER_DEVICE_TRAIN_BATCH_SIZE,
|
25 |
+
gradient_accumulation_steps=config.GRADIENT_ACCUMULATION_STEPS,
|
26 |
+
optim=config.OPTIM,
|
27 |
+
save_steps=config.SAVE_STEPS,
|
28 |
+
logging_steps=config.LOGGING_STEPS,
|
29 |
+
learning_rate=config.LEARNING_RATE,
|
30 |
+
fp16=config.ENABLE_FP_16,
|
31 |
+
max_grad_norm=config.MAX_GRAD_NORM,
|
32 |
+
max_steps=config.MAX_STEPS,
|
33 |
+
warmup_ratio=config.WARMUP_RATIO,
|
34 |
+
gradient_checkpointing=config.ENABLE_GRADIENT_CHECKPOINTING
|
35 |
+
)
|
36 |
+
return training_arguments
|
37 |
+
|
38 |
+
def load_trainer(model, training_dataset, peft_config, tokenizer, training_arguments):
|
39 |
+
trainer = SFTTrainer(
|
40 |
+
model = model,
|
41 |
+
train_dataset = training_dataset,
|
42 |
+
peft_config = peft_config,
|
43 |
+
dataset_text_field = config.DATASET_TEXT_FIELD,
|
44 |
+
max_seq_length = config.MAX_SEQ_LENGTH,
|
45 |
+
tokenizer = tokenizer,
|
46 |
+
args = training_arguments
|
47 |
+
)
|
48 |
+
return trainer
|
quantization_utils.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import BitsAndBytesConfig
|
3 |
+
|
4 |
+
import config
|
5 |
+
|
6 |
+
def load_bits_and_bytes_config():
|
7 |
+
bnb_config = BitsAndBytesConfig(
|
8 |
+
load_in_4bit=config.ENABLE_4BIT,
|
9 |
+
bnb_4bit_quant_type=config.QUANTIZATION_TYPE,
|
10 |
+
bnb_4bit_compute_dtype=torch.float16
|
11 |
+
)
|
12 |
+
|
13 |
+
return bnb_config
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
torchvision
|
3 |
+
trl
|
4 |
+
transformers
|
5 |
+
accelerate
|
6 |
+
peft
|
7 |
+
einops
|
8 |
+
datasets
|
9 |
+
bitsandbytes
|
10 |
+
scipy
|