cloudyu commited on
Commit
3a01bc4
1 Parent(s): 0b08d70

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. dpo.py +99 -0
dpo.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from urllib.parse import unquote_plus
4
+ import os
5
+
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, Trainer, TrainingArguments, BitsAndBytesConfig, \
7
+ DataCollatorForLanguageModeling, Trainer, TrainingArguments
8
+ from transformers import BitsAndBytesConfig
9
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
10
+
11
+ nf4_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_use_double_quant=True,
15
+ bnb_4bit_compute_dtype=torch.bfloat16
16
+ )
17
+
18
+ # Carregar o modelo e o tokenizador na GPU
19
+ device = "cuda:0"
20
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
21
+ model = AutoModelForCausalLM.from_pretrained(model_id,quantization_config=nf4_config,device_map="auto",local_files_only=False,trust_remote_code=True)
22
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=False)
23
+ if tokenizer.pad_token is None:
24
+ tokenizer.pad_token = tokenizer.eos_token
25
+ print(model)
26
+ from transformers import AutoModelForCausalLM
27
+ from datasets import load_dataset
28
+ from trl import *
29
+
30
+ # jondurbin/truthy-dpo-v0.1
31
+
32
+ def return_prompt_and_responses(samples) :
33
+ return {
34
+ "prompt": [
35
+ "Question: " + question + "\n\nAnswer: "
36
+ for question in samples["prompt"]
37
+ ],
38
+ "chosen": samples["chosen"], # rated better than k
39
+ "rejected": samples["rejected"], # rated worse than j
40
+ }
41
+
42
+ dataset = load_dataset(
43
+ "jondurbin/truthy-dpo-v0.1",
44
+ split="train",
45
+ #data_dir="data/rl"
46
+ )
47
+ original_columns = dataset.column_names
48
+
49
+ dataset.map(
50
+ return_prompt_and_responses,
51
+ batched=True,
52
+ remove_columns=original_columns
53
+ )
54
+
55
+
56
+ model = prepare_model_for_kbit_training(model)
57
+
58
+ peft_config = LoraConfig(
59
+ r=128,
60
+ lora_alpha=16,
61
+ target_modules=["q_proj","k_proj","v_proj","o_proj", "up_proj","gate_proj","down_proj", "lm_head"],
62
+ lora_dropout=0.05,
63
+ bias="none",
64
+ task_type="CAUSAL_LM",
65
+ )
66
+ output_dir = "./odp"
67
+ training_args = TrainingArguments(
68
+ per_device_train_batch_size=1,
69
+ gradient_accumulation_steps=1,
70
+ gradient_checkpointing =True,
71
+ max_grad_norm= 0.3,
72
+ optim='adafactor',
73
+ overwrite_output_dir=True,save_steps=100,
74
+ num_train_epochs=1,
75
+ learning_rate=2e-4,
76
+ bf16=True,
77
+ save_total_limit=3,
78
+ logging_steps=10,
79
+ output_dir=output_dir,
80
+ lr_scheduler_type="cosine",
81
+ warmup_ratio=0.05,
82
+ )
83
+
84
+ dpo_trainer = DPOTrainer(
85
+ model,
86
+ #model_ref,
87
+ args=training_args,
88
+ peft_config=peft_config,
89
+ beta=0.1,
90
+ train_dataset=dataset,
91
+ tokenizer=tokenizer,
92
+ max_prompt_length=1024,
93
+ max_length=2048,
94
+ )
95
+
96
+ dpo_trainer.train()
97
+
98
+
99
+